cosmopolitan/tool/build/lib/cvt.c

391 lines
11 KiB
C

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2020 Justine Alexandra Roberts Tunney │
│ │
│ This program is free software; you can redistribute it and/or modify │
│ it under the terms of the GNU General Public License as published by │
│ the Free Software Foundation; version 2 of the License. │
│ │
│ This program is distributed in the hope that it will be useful, but │
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
│ General Public License for more details. │
│ │
│ You should have received a copy of the GNU General Public License │
│ along with this program; if not, write to the Free Software │
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
│ 02110-1301 USA │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/macros.h"
#include "libc/math.h"
#include "tool/build/lib/cvt.h"
#include "tool/build/lib/endian.h"
#include "tool/build/lib/machine.h"
#include "tool/build/lib/memory.h"
#include "tool/build/lib/modrm.h"
#include "tool/build/lib/throw.h"
#define kOpCvt0f2a 0
#define kOpCvtt0f2c 4
#define kOpCvt0f2d 8
#define kOpCvt0f5a 12
#define kOpCvt0f5b 16
#define kOpCvt0fE6 20
static double SseRoundDouble(struct Machine *m, double x) {
switch (m->sse.rc) {
case 0:
return rint(x);
case 1:
return floor(x);
case 2:
return ceil(x);
case 3:
return trunc(x);
default:
unreachable;
}
}
static void OpGdqpWssCvttss2si(struct Machine *m, uint32_t rde) {
float f;
int64_t n;
memcpy(&f, GetModrmRegisterXmmPointerRead4(m, rde), 4);
n = f;
if (!Rexw(rde)) n &= 0xffffffff;
Write64(RegRexrReg(m, rde), n);
}
static void OpGdqpWsdCvttsd2si(struct Machine *m, uint32_t rde) {
double d;
int64_t n;
memcpy(&d, GetModrmRegisterXmmPointerRead8(m, rde), 8);
n = d;
if (!Rexw(rde)) n &= 0xffffffff;
Write64(RegRexrReg(m, rde), n);
}
static void OpGdqpWssCvtss2si(struct Machine *m, uint32_t rde) {
float f;
int64_t n;
memcpy(&f, GetModrmRegisterXmmPointerRead4(m, rde), 4);
n = rintf(f);
if (!Rexw(rde)) n &= 0xffffffff;
Write64(RegRexrReg(m, rde), n);
}
static void OpGdqpWsdCvtsd2si(struct Machine *m, uint32_t rde) {
double d;
int64_t n;
memcpy(&d, GetModrmRegisterXmmPointerRead8(m, rde), 8);
n = SseRoundDouble(m, d);
if (!Rexw(rde)) n &= 0xffffffff;
Write64(RegRexrReg(m, rde), n);
}
static void OpVssEdqpCvtsi2ss(struct Machine *m, uint32_t rde) {
float f;
int64_t n;
uint8_t *p;
if (Rexw(rde)) {
n = (int64_t)Read64(GetModrmRegisterWordPointerRead8(m, rde));
} else {
n = (int32_t)Read32(GetModrmRegisterWordPointerRead4(m, rde));
}
f = n;
memcpy(XmmRexrReg(m, rde), &f, 4);
}
static void OpVsdEdqpCvtsi2sd(struct Machine *m, uint32_t rde) {
double d;
int64_t n;
uint8_t *p;
if (Rexw(rde)) {
n = (int64_t)Read64(GetModrmRegisterWordPointerRead8(m, rde));
} else {
n = (int32_t)Read32(GetModrmRegisterWordPointerRead4(m, rde));
}
d = n;
memcpy(XmmRexrReg(m, rde), &d, 8);
}
static void OpVpsQpiCvtpi2ps(struct Machine *m, uint32_t rde) {
uint8_t *p;
float f[2];
int32_t i[2];
p = GetModrmRegisterMmPointerRead8(m, rde);
i[0] = Read32(p + 0);
i[1] = Read32(p + 4);
f[0] = i[0];
f[1] = i[1];
memcpy(XmmRexrReg(m, rde), f, 8);
}
static void OpVpdQpiCvtpi2pd(struct Machine *m, uint32_t rde) {
uint8_t *p;
double f[2];
int32_t n[2];
p = GetModrmRegisterMmPointerRead8(m, rde);
n[0] = Read32(p + 0);
n[1] = Read32(p + 4);
f[0] = n[0];
f[1] = n[1];
memcpy(XmmRexrReg(m, rde), f, 16);
}
static void OpPpiWpsqCvtps2pi(struct Machine *m, uint32_t rde) {
unsigned i;
float f[2];
int32_t n[2];
memcpy(f, GetModrmRegisterXmmPointerRead8(m, rde), 8);
switch (m->sse.rc) {
case 0:
for (i = 0; i < 2; ++i) n[i] = rintf(f[i]);
break;
case 1:
for (i = 0; i < 2; ++i) n[i] = floorf(f[i]);
break;
case 2:
for (i = 0; i < 2; ++i) n[i] = ceilf(f[i]);
break;
case 3:
for (i = 0; i < 2; ++i) n[i] = truncf(f[i]);
break;
default:
unreachable;
}
Write32(MmReg(m, rde) + 0, n[0]);
Write32(MmReg(m, rde) + 4, n[1]);
}
static void OpPpiWpsqCvttps2pi(struct Machine *m, uint32_t rde) {
float f[2];
int32_t n[2];
memcpy(&f, GetModrmRegisterXmmPointerRead8(m, rde), 8);
n[0] = f[0];
n[1] = f[1];
Write32(MmReg(m, rde) + 0, n[0]);
Write32(MmReg(m, rde) + 4, n[1]);
}
static void OpPpiWpdCvtpd2pi(struct Machine *m, uint32_t rde) {
unsigned i;
double d[2];
int32_t n[2];
memcpy(d, GetModrmRegisterXmmPointerRead16(m, rde), 16);
for (i = 0; i < 2; ++i) n[i] = SseRoundDouble(m, d[i]);
Write32(MmReg(m, rde) + 0, n[0]);
Write32(MmReg(m, rde) + 4, n[1]);
}
static void OpPpiWpdCvttpd2pi(struct Machine *m, uint32_t rde) {
double d[2];
int32_t n[2];
memcpy(&d, GetModrmRegisterXmmPointerRead16(m, rde), 16);
n[0] = d[0];
n[1] = d[1];
Write32(MmReg(m, rde) + 0, n[0]);
Write32(MmReg(m, rde) + 4, n[1]);
}
static void OpVpdWpsCvtps2pd(struct Machine *m, uint32_t rde) {
float f[2];
double d[2];
memcpy(f, GetModrmRegisterXmmPointerRead8(m, rde), 8);
d[0] = f[0];
d[1] = f[1];
memcpy(XmmRexrReg(m, rde), d, 16);
}
static void OpVpsWpdCvtpd2ps(struct Machine *m, uint32_t rde) {
float f[2];
double d[2];
memcpy(d, GetModrmRegisterXmmPointerRead16(m, rde), 16);
f[0] = d[0];
f[1] = d[1];
memcpy(XmmRexrReg(m, rde), f, 8);
}
static void OpVssWsdCvtsd2ss(struct Machine *m, uint32_t rde) {
float f;
double d;
memcpy(&d, GetModrmRegisterXmmPointerRead8(m, rde), 8);
f = d;
memcpy(XmmRexrReg(m, rde), &f, 4);
}
static void OpVsdWssCvtss2sd(struct Machine *m, uint32_t rde) {
float f;
double d;
memcpy(&f, GetModrmRegisterXmmPointerRead4(m, rde), 4);
d = f;
memcpy(XmmRexrReg(m, rde), &d, 8);
}
static void OpVpsWdqCvtdq2ps(struct Machine *m, uint32_t rde) {
unsigned i;
float f[4];
int32_t n[4];
memcpy(n, GetModrmRegisterXmmPointerRead16(m, rde), 16);
for (i = 0; i < 4; ++i) f[i] = n[i];
memcpy(XmmRexrReg(m, rde), f, 16);
}
static void OpVpdWdqCvtdq2pd(struct Machine *m, uint32_t rde) {
unsigned i;
double d[2];
int32_t n[2];
memcpy(n, GetModrmRegisterXmmPointerRead8(m, rde), 8);
for (i = 0; i < 2; ++i) d[i] = n[i];
memcpy(XmmRexrReg(m, rde), d, 16);
}
static void OpVdqWpsCvttps2dq(struct Machine *m, uint32_t rde) {
unsigned i;
float f[4];
int32_t n[4];
memcpy(f, GetModrmRegisterXmmPointerRead16(m, rde), 16);
for (i = 0; i < 4; ++i) n[i] = f[i];
memcpy(XmmRexrReg(m, rde), n, 16);
}
static void OpVdqWpsCvtps2dq(struct Machine *m, uint32_t rde) {
unsigned i;
float f[4];
int32_t n[4];
memcpy(f, GetModrmRegisterXmmPointerRead16(m, rde), 16);
switch (m->sse.rc) {
case 0:
for (i = 0; i < 4; ++i) n[i] = rintf(f[i]);
break;
case 1:
for (i = 0; i < 4; ++i) n[i] = floorf(f[i]);
break;
case 2:
for (i = 0; i < 4; ++i) n[i] = ceilf(f[i]);
break;
case 3:
for (i = 0; i < 4; ++i) n[i] = truncf(f[i]);
break;
default:
unreachable;
}
memcpy(XmmRexrReg(m, rde), n, 16);
}
static void OpVdqWpdCvttpd2dq(struct Machine *m, uint32_t rde) {
unsigned i;
double d[2];
int32_t n[2];
memcpy(d, GetModrmRegisterXmmPointerRead16(m, rde), 16);
for (i = 0; i < 2; ++i) n[i] = d[i];
memcpy(XmmRexrReg(m, rde), n, 8);
}
static void OpVdqWpdCvtpd2dq(struct Machine *m, uint32_t rde) {
unsigned i;
double d[2];
int32_t n[2];
memcpy(d, GetModrmRegisterXmmPointerRead16(m, rde), 16);
for (i = 0; i < 2; ++i) n[i] = SseRoundDouble(m, d[i]);
memcpy(XmmRexrReg(m, rde), n, 8);
}
static void OpCvt(struct Machine *m, uint32_t rde, unsigned long op) {
switch (op | Rep(rde) | Osz(rde)) {
case kOpCvt0f2a + 0:
OpVpsQpiCvtpi2ps(m, rde);
break;
case kOpCvt0f2a + 1:
OpVpdQpiCvtpi2pd(m, rde);
break;
case kOpCvt0f2a + 2:
OpVsdEdqpCvtsi2sd(m, rde);
break;
case kOpCvt0f2a + 3:
OpVssEdqpCvtsi2ss(m, rde);
break;
case kOpCvtt0f2c + 0:
OpPpiWpsqCvttps2pi(m, rde);
break;
case kOpCvtt0f2c + 1:
OpPpiWpdCvttpd2pi(m, rde);
break;
case kOpCvtt0f2c + 2:
OpGdqpWsdCvttsd2si(m, rde);
break;
case kOpCvtt0f2c + 3:
OpGdqpWssCvttss2si(m, rde);
break;
case kOpCvt0f2d + 0:
OpPpiWpsqCvtps2pi(m, rde);
break;
case kOpCvt0f2d + 1:
OpPpiWpdCvtpd2pi(m, rde);
break;
case kOpCvt0f2d + 2:
OpGdqpWsdCvtsd2si(m, rde);
break;
case kOpCvt0f2d + 3:
OpGdqpWssCvtss2si(m, rde);
break;
case kOpCvt0f5a + 0:
OpVpdWpsCvtps2pd(m, rde);
break;
case kOpCvt0f5a + 1:
OpVpsWpdCvtpd2ps(m, rde);
break;
case kOpCvt0f5a + 2:
OpVssWsdCvtsd2ss(m, rde);
break;
case kOpCvt0f5a + 3:
OpVsdWssCvtss2sd(m, rde);
break;
case kOpCvt0f5b + 0:
OpVpsWdqCvtdq2ps(m, rde);
break;
case kOpCvt0f5b + 1:
OpVdqWpsCvtps2dq(m, rde);
break;
case kOpCvt0f5b + 3:
OpVdqWpsCvttps2dq(m, rde);
break;
case kOpCvt0fE6 + 1:
OpVdqWpdCvtpd2dq(m, rde);
break;
case kOpCvt0fE6 + 2:
OpVdqWpdCvttpd2dq(m, rde);
break;
case kOpCvt0fE6 + 3:
OpVpdWdqCvtdq2pd(m, rde);
break;
default:
OpUd(m, rde);
}
}
void OpCvt0f2a(struct Machine *m, uint32_t rde) {
OpCvt(m, rde, kOpCvt0f2a);
}
void OpCvtt0f2c(struct Machine *m, uint32_t rde) {
OpCvt(m, rde, kOpCvtt0f2c);
}
void OpCvt0f2d(struct Machine *m, uint32_t rde) {
OpCvt(m, rde, kOpCvt0f2d);
}
void OpCvt0f5a(struct Machine *m, uint32_t rde) {
OpCvt(m, rde, kOpCvt0f5a);
}
void OpCvt0f5b(struct Machine *m, uint32_t rde) {
OpCvt(m, rde, kOpCvt0f5b);
}
void OpCvt0fE6(struct Machine *m, uint32_t rde) {
OpCvt(m, rde, kOpCvt0fE6);
}