255 lines
10 KiB
C
255 lines
10 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ This program is free software; you can redistribute it and/or modify │
|
|
│ it under the terms of the GNU General Public License as published by │
|
|
│ the Free Software Foundation; version 2 of the License. │
|
|
│ │
|
|
│ This program is distributed in the hope that it will be useful, but │
|
|
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
|
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
|
│ General Public License for more details. │
|
|
│ │
|
|
│ You should have received a copy of the GNU General Public License │
|
|
│ along with this program; if not, write to the Free Software │
|
|
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
|
│ 02110-1301 USA │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/intrin/pabsb.h"
|
|
#include "libc/intrin/pabsd.h"
|
|
#include "libc/intrin/pabsw.h"
|
|
#include "libc/intrin/packssdw.h"
|
|
#include "libc/intrin/packsswb.h"
|
|
#include "libc/intrin/packuswb.h"
|
|
#include "libc/intrin/paddb.h"
|
|
#include "libc/intrin/paddd.h"
|
|
#include "libc/intrin/paddq.h"
|
|
#include "libc/intrin/paddsb.h"
|
|
#include "libc/intrin/paddsw.h"
|
|
#include "libc/intrin/paddusb.h"
|
|
#include "libc/intrin/paddusw.h"
|
|
#include "libc/intrin/paddw.h"
|
|
#include "libc/intrin/palignr.h"
|
|
#include "libc/intrin/pand.h"
|
|
#include "libc/intrin/pandn.h"
|
|
#include "libc/intrin/pavgb.h"
|
|
#include "libc/intrin/pavgw.h"
|
|
#include "libc/intrin/pcmpeqb.h"
|
|
#include "libc/intrin/pcmpeqd.h"
|
|
#include "libc/intrin/pcmpeqw.h"
|
|
#include "libc/intrin/pcmpgtb.h"
|
|
#include "libc/intrin/pcmpgtd.h"
|
|
#include "libc/intrin/pcmpgtw.h"
|
|
#include "libc/intrin/phaddd.h"
|
|
#include "libc/intrin/phaddsw.h"
|
|
#include "libc/intrin/phaddw.h"
|
|
#include "libc/intrin/phsubd.h"
|
|
#include "libc/intrin/phsubsw.h"
|
|
#include "libc/intrin/phsubw.h"
|
|
#include "libc/intrin/pmaddubsw.h"
|
|
#include "libc/intrin/pmaddwd.h"
|
|
#include "libc/intrin/pmaxsw.h"
|
|
#include "libc/intrin/pmaxub.h"
|
|
#include "libc/intrin/pminsw.h"
|
|
#include "libc/intrin/pminub.h"
|
|
#include "libc/intrin/pmulhrsw.h"
|
|
#include "libc/intrin/pmulhuw.h"
|
|
#include "libc/intrin/pmulhw.h"
|
|
#include "libc/intrin/pmulld.h"
|
|
#include "libc/intrin/pmullw.h"
|
|
#include "libc/intrin/pmuludq.h"
|
|
#include "libc/intrin/por.h"
|
|
#include "libc/intrin/psadbw.h"
|
|
#include "libc/intrin/pshufb.h"
|
|
#include "libc/intrin/psignb.h"
|
|
#include "libc/intrin/psignd.h"
|
|
#include "libc/intrin/psignw.h"
|
|
#include "libc/intrin/pslld.h"
|
|
#include "libc/intrin/pslldq.h"
|
|
#include "libc/intrin/psllq.h"
|
|
#include "libc/intrin/psllw.h"
|
|
#include "libc/intrin/psrad.h"
|
|
#include "libc/intrin/psraw.h"
|
|
#include "libc/intrin/psrld.h"
|
|
#include "libc/intrin/psrldq.h"
|
|
#include "libc/intrin/psrlq.h"
|
|
#include "libc/intrin/psrlw.h"
|
|
#include "libc/intrin/psubb.h"
|
|
#include "libc/intrin/psubd.h"
|
|
#include "libc/intrin/psubq.h"
|
|
#include "libc/intrin/psubsb.h"
|
|
#include "libc/intrin/psubsw.h"
|
|
#include "libc/intrin/psubusb.h"
|
|
#include "libc/intrin/psubusw.h"
|
|
#include "libc/intrin/psubw.h"
|
|
#include "libc/intrin/punpckhbw.h"
|
|
#include "libc/intrin/punpckhdq.h"
|
|
#include "libc/intrin/punpckhqdq.h"
|
|
#include "libc/intrin/punpckhwd.h"
|
|
#include "libc/intrin/punpcklbw.h"
|
|
#include "libc/intrin/punpckldq.h"
|
|
#include "libc/intrin/punpcklqdq.h"
|
|
#include "libc/intrin/punpcklwd.h"
|
|
#include "libc/intrin/pxor.h"
|
|
#include "libc/macros.h"
|
|
#include "libc/str/str.h"
|
|
#include "tool/build/lib/case.h"
|
|
#include "tool/build/lib/machine.h"
|
|
#include "tool/build/lib/memory.h"
|
|
#include "tool/build/lib/modrm.h"
|
|
#include "tool/build/lib/sse.h"
|
|
|
|
union MachineVector {
|
|
float f32[4];
|
|
double f64[2];
|
|
int8_t i8[16];
|
|
int16_t i16[8];
|
|
int32_t i32[4];
|
|
int64_t i64[2];
|
|
uint8_t u8[16];
|
|
uint16_t u16[8];
|
|
uint32_t u32[4];
|
|
uint64_t u64[2];
|
|
};
|
|
|
|
void OpSse(struct Machine *m, uint32_t rde, enum OpSseKernel kernel) {
|
|
int i;
|
|
uint8_t *p;
|
|
union MachineVector x, y;
|
|
p = GetModrmRegisterXmmPointerRead16(m, rde);
|
|
if (Osz(rde)) {
|
|
memcpy(&y, p, 16);
|
|
} else {
|
|
memset(&y, 0, 16);
|
|
memcpy(&y, p, 8);
|
|
}
|
|
memcpy(&x, XmmRexrReg(m, rde), 16);
|
|
switch (kernel) {
|
|
CASE(kOpSsePsubb, psubb(x.i8, x.i8, y.i8));
|
|
CASE(kOpSsePaddb, paddb(x.i8, x.i8, y.i8));
|
|
CASE(kOpSsePsubw, psubw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePaddw, paddw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePsubd, psubd(x.i32, x.i32, y.i32));
|
|
CASE(kOpSsePaddd, paddd(x.i32, x.i32, y.i32));
|
|
CASE(kOpSsePaddq, paddq(x.i64, x.i64, y.i64));
|
|
CASE(kOpSsePsubq, psubq(x.i64, x.i64, y.i64));
|
|
CASE(kOpSsePsubsb, psubsb(x.i8, x.i8, y.i8));
|
|
CASE(kOpSsePsubsw, psubsw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePaddsb, paddsb(x.i8, x.i8, y.i8));
|
|
CASE(kOpSsePaddsw, paddsw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePaddusb, paddusb(x.u8, x.u8, y.u8));
|
|
CASE(kOpSsePaddusw, paddusw(x.u16, x.u16, y.u16));
|
|
CASE(kOpSsePor, por(x.u64, x.u64, y.u64));
|
|
CASE(kOpSsePxor, pxor(x.u64, x.u64, y.u64));
|
|
CASE(kOpSsePand, pand(x.u64, x.u64, y.u64));
|
|
CASE(kOpSsePandn, pandn(x.u64, x.u64, y.u64));
|
|
CASE(kOpSsePsubusb, psubusb(x.u8, x.u8, y.u8));
|
|
CASE(kOpSsePsubusw, psubusw(x.u16, x.u16, y.u16));
|
|
CASE(kOpSsePminub, pminub(x.u8, x.u8, y.u8));
|
|
CASE(kOpSsePmaxub, pmaxub(x.u8, x.u8, y.u8));
|
|
CASE(kOpSsePminsw, pminsw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePmaxsw, pmaxsw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePunpcklbw, punpcklbw(x.u8, x.u8, y.u8));
|
|
CASE(kOpSsePunpckhbw, punpckhbw(x.u8, x.u8, y.u8));
|
|
CASE(kOpSsePunpcklwd, punpcklwd(x.u16, x.u16, y.u16));
|
|
CASE(kOpSsePunpckldq, punpckldq(x.u32, x.u32, y.u32));
|
|
CASE(kOpSsePunpckhwd, punpckhwd(x.u16, x.u16, y.u16));
|
|
CASE(kOpSsePunpckhdq, punpckhdq(x.u32, x.u32, y.u32));
|
|
CASE(kOpSsePunpcklqdq, punpcklqdq(x.u64, x.u64, y.u64));
|
|
CASE(kOpSsePunpckhqdq, punpckhqdq(x.u64, x.u64, y.u64));
|
|
CASE(kOpSsePacksswb, packsswb(x.i8, x.i16, y.i16));
|
|
CASE(kOpSsePackuswb, packuswb(x.u8, x.i16, y.i16));
|
|
CASE(kOpSsePackssdw, packssdw(x.i16, x.i32, y.i32));
|
|
CASE(kOpSsePcmpgtb, pcmpgtb(x.i8, x.i8, y.i8));
|
|
CASE(kOpSsePcmpgtw, pcmpgtw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePcmpgtd, pcmpgtd(x.i32, x.i32, y.i32));
|
|
CASE(kOpSsePcmpeqb, pcmpeqb(x.u8, x.u8, y.u8));
|
|
CASE(kOpSsePcmpeqw, pcmpeqw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePcmpeqd, pcmpeqd(x.i32, x.i32, y.i32));
|
|
CASE(kOpSsePsrawv, psrawv(x.i16, x.i16, y.u64));
|
|
CASE(kOpSsePsrlwv, psrlwv(x.u16, x.u16, y.u64));
|
|
CASE(kOpSsePsllwv, psllwv(x.u16, x.u16, y.u64));
|
|
CASE(kOpSsePsradv, psradv(x.i32, x.i32, y.u64));
|
|
CASE(kOpSsePsrldv, psrldv(x.u32, x.u32, y.u64));
|
|
CASE(kOpSsePslldv, pslldv(x.u32, x.u32, y.u64));
|
|
CASE(kOpSsePsrlqv, psrlqv(x.u64, x.u64, y.u64));
|
|
CASE(kOpSsePsllqv, psllqv(x.u64, x.u64, y.u64));
|
|
CASE(kOpSsePavgb, pavgb(x.u8, x.u8, y.u8));
|
|
CASE(kOpSsePavgw, pavgw(x.u16, x.u16, y.u16));
|
|
CASE(kOpSsePsadbw, psadbw(x.u64, x.u8, y.u8));
|
|
CASE(kOpSsePmaddwd, pmaddwd(x.i32, x.i16, y.i16));
|
|
CASE(kOpSsePmulhuw, pmulhuw(x.u16, x.u16, y.u16));
|
|
CASE(kOpSsePmulhw, pmulhw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePmuludq, pmuludq(x.u64, x.u32, y.u32));
|
|
CASE(kOpSsePmullw, pmullw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePmulld, pmulld(x.i32, x.i32, y.i32));
|
|
CASE(kOpSsePshufb, pshufb(x.u8, x.u8, y.u8));
|
|
CASE(kOpSsePhaddw, phaddw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePhaddd, phaddd(x.i32, x.i32, y.i32));
|
|
CASE(kOpSsePhaddsw, phaddsw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePmaddubsw, pmaddubsw(x.i16, x.u8, y.i8));
|
|
CASE(kOpSsePhsubw, phsubw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePhsubd, phsubd(x.i32, x.i32, y.i32));
|
|
CASE(kOpSsePhsubsw, phsubsw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePsignb, psignb(x.i8, x.i8, y.i8));
|
|
CASE(kOpSsePsignw, psignw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePsignd, psignd(x.i32, x.i32, y.i32));
|
|
CASE(kOpSsePmulhrsw, pmulhrsw(x.i16, x.i16, y.i16));
|
|
CASE(kOpSsePabsb, pabsb(x.u8, x.i8));
|
|
CASE(kOpSsePabsw, pabsw(x.u16, x.i16));
|
|
CASE(kOpSsePabsd, pabsd(x.u32, x.i32));
|
|
default:
|
|
unreachable;
|
|
}
|
|
if (Osz(rde)) {
|
|
memcpy(XmmRexrReg(m, rde), &x, 16);
|
|
} else {
|
|
memcpy(XmmRexrReg(m, rde), &x, 8);
|
|
}
|
|
}
|
|
|
|
void OpSseUdqIb(struct Machine *m, uint32_t rde, enum OpSseUdqIbKernel kernel) {
|
|
uint8_t i;
|
|
union MachineVector x;
|
|
i = m->xedd->op.uimm0;
|
|
memcpy(&x, XmmRexbRm(m, rde), 16);
|
|
switch (kernel) {
|
|
CASE(kOpSseUdqIbPsrlw, (psrlw)(x.u16, x.u16, i));
|
|
CASE(kOpSseUdqIbPsraw, (psraw)(x.i16, x.i16, i));
|
|
CASE(kOpSseUdqIbPsllw, (psllw)(x.u16, x.u16, i));
|
|
CASE(kOpSseUdqIbPsrld, (psrld)(x.u32, x.u32, i));
|
|
CASE(kOpSseUdqIbPsrad, (psrad)(x.i32, x.i32, i));
|
|
CASE(kOpSseUdqIbPslld, (pslld)(x.u32, x.u32, i));
|
|
CASE(kOpSseUdqIbPsrlq, (psrlq)(x.u64, x.u64, i));
|
|
CASE(kOpSseUdqIbPsrldq, (psrldq)(x.u8, x.u8, i));
|
|
CASE(kOpSseUdqIbPsllq, (psllq)(x.u64, x.u64, i));
|
|
CASE(kOpSseUdqIbPslldq, (pslldq)(x.u8, x.u8, i));
|
|
default:
|
|
unreachable;
|
|
}
|
|
if (Osz(rde)) {
|
|
memcpy(XmmRexbRm(m, rde), &x, 16);
|
|
} else {
|
|
memcpy(XmmRexbRm(m, rde), &x, 8);
|
|
}
|
|
}
|
|
|
|
static void OpSsePalignrMmx(struct Machine *m, uint32_t rde) {
|
|
char t[24];
|
|
memcpy(t, GetModrmRegisterXmmPointerRead8(m, rde), 8);
|
|
memcpy(t + 8, XmmRexrReg(m, rde), 8);
|
|
memset(t + 16, 0, 8);
|
|
memcpy(XmmRexrReg(m, rde), t + MIN(m->xedd->op.uimm0, 16), 8);
|
|
}
|
|
|
|
void OpSsePalignr(struct Machine *m, uint32_t rde) {
|
|
if (Osz(rde)) {
|
|
palignr(XmmRexrReg(m, rde), XmmRexrReg(m, rde),
|
|
GetModrmRegisterXmmPointerRead8(m, rde), m->xedd->op.uimm0);
|
|
} else {
|
|
OpSsePalignrMmx(m, rde);
|
|
}
|
|
}
|