cosmopolitan/test/libc/intrin/intrin_test.c

2080 lines
48 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2020 Justine Alexandra Roberts Tunney │
│ │
│ This program is free software; you can redistribute it and/or modify │
│ it under the terms of the GNU General Public License as published by │
│ the Free Software Foundation; version 2 of the License. │
│ │
│ This program is distributed in the hope that it will be useful, but │
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
│ General Public License for more details. │
│ │
│ You should have received a copy of the GNU General Public License │
│ along with this program; if not, write to the Free Software │
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
│ 02110-1301 USA │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/bits/progn.h"
#include "libc/intrin/mpsadbw.h"
#include "libc/intrin/pabsb.h"
#include "libc/intrin/pabsd.h"
#include "libc/intrin/pabsw.h"
#include "libc/intrin/packssdw.h"
#include "libc/intrin/packsswb.h"
#include "libc/intrin/packusdw.h"
#include "libc/intrin/packuswb.h"
#include "libc/intrin/paddb.h"
#include "libc/intrin/paddd.h"
#include "libc/intrin/paddq.h"
#include "libc/intrin/paddsb.h"
#include "libc/intrin/paddsw.h"
#include "libc/intrin/paddusb.h"
#include "libc/intrin/paddusw.h"
#include "libc/intrin/paddw.h"
#include "libc/intrin/pand.h"
#include "libc/intrin/pandn.h"
#include "libc/intrin/pavgb.h"
#include "libc/intrin/pavgw.h"
#include "libc/intrin/pcmpeqb.h"
#include "libc/intrin/pcmpeqd.h"
#include "libc/intrin/pcmpeqw.h"
#include "libc/intrin/pcmpgtb.h"
#include "libc/intrin/pcmpgtd.h"
#include "libc/intrin/pcmpgtw.h"
#include "libc/intrin/phaddd.h"
#include "libc/intrin/phaddsw.h"
#include "libc/intrin/phaddw.h"
#include "libc/intrin/phsubd.h"
#include "libc/intrin/phsubsw.h"
#include "libc/intrin/phsubw.h"
#include "libc/intrin/pmaddubsw.h"
#include "libc/intrin/pmaddwd.h"
#include "libc/intrin/pmaxsw.h"
#include "libc/intrin/pmaxub.h"
#include "libc/intrin/pminsw.h"
#include "libc/intrin/pminub.h"
#include "libc/intrin/pmulhrsw.h"
#include "libc/intrin/pmulhuw.h"
#include "libc/intrin/pmulhw.h"
#include "libc/intrin/pmulld.h"
#include "libc/intrin/pmullw.h"
#include "libc/intrin/pmuludq.h"
#include "libc/intrin/por.h"
#include "libc/intrin/psadbw.h"
#include "libc/intrin/pshufb.h"
#include "libc/intrin/pshufd.h"
#include "libc/intrin/pshufhw.h"
#include "libc/intrin/pshuflw.h"
#include "libc/intrin/pshufw.h"
#include "libc/intrin/psignb.h"
#include "libc/intrin/psignd.h"
#include "libc/intrin/psignw.h"
#include "libc/intrin/pslld.h"
#include "libc/intrin/pslldq.h"
#include "libc/intrin/psllq.h"
#include "libc/intrin/psllw.h"
#include "libc/intrin/psrad.h"
#include "libc/intrin/psraw.h"
#include "libc/intrin/psrld.h"
#include "libc/intrin/psrldq.h"
#include "libc/intrin/psrlq.h"
#include "libc/intrin/psrlw.h"
#include "libc/intrin/psubb.h"
#include "libc/intrin/psubq.h"
#include "libc/intrin/psubsb.h"
#include "libc/intrin/psubsw.h"
#include "libc/intrin/psubusb.h"
#include "libc/intrin/psubusw.h"
#include "libc/intrin/psubw.h"
#include "libc/intrin/punpckhbw.h"
#include "libc/intrin/punpckhdq.h"
#include "libc/intrin/punpckhqdq.h"
#include "libc/intrin/punpckhwd.h"
#include "libc/intrin/punpcklbw.h"
#include "libc/intrin/punpckldq.h"
#include "libc/intrin/punpcklqdq.h"
#include "libc/intrin/punpcklwd.h"
#include "libc/intrin/pxor.h"
#include "libc/limits.h"
#include "libc/log/check.h"
#include "libc/nexgen32e/kcpuids.h"
#include "libc/rand/lcg.h"
#include "libc/rand/rand.h"
#include "libc/runtime/gc.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/testlib.h"
#include "libc/x/x.h"
#include "tool/viz/lib/formatstringtable-testlib.h"
uint64_t g_rando = 1;
forceinline uint64_t Rando(void) {
return KnuthLinearCongruentialGenerator(&g_rando) >> 32 << 32 |
KnuthLinearCongruentialGenerator(&g_rando) >> 32;
}
noinline void RngSet(void *mem, size_t size) {
uint64_t coin;
DCHECK(size % 8 == 0);
for (size >>= 3; size--;) {
coin = Rando();
memcpy((char *)mem + size * 8, &coin, 8);
}
}
FIXTURE(intrin, disableHardwareExtensions) {
memset((/*unconst*/ void *)kCpuids, 0, sizeof(kCpuids));
}
TEST(punpcklwd, test) {
uint16_t a[8] = {1, 02, 03, 04, 05, 06, 07, 8};
uint16_t b[8] = {9, 10, 11, 12, 13, 14, 15, 16};
uint16_t c[8];
punpcklwd(c, a, b);
ASSERT_EQ(1, c[0]);
ASSERT_EQ(9, c[1]);
ASSERT_EQ(2, c[2]);
ASSERT_EQ(10, c[3]);
ASSERT_EQ(3, c[4]);
ASSERT_EQ(11, c[5]);
ASSERT_EQ(4, c[6]);
ASSERT_EQ(12, c[7]);
}
TEST(punpcklwd, pure) {
uint16_t a[8] = {1, 02, 03, 04, 05, 06, 07, 8};
uint16_t b[8] = {9, 10, 11, 12, 13, 14, 15, 16};
uint16_t c[8];
punpcklwd(c, a, b);
ASSERT_EQ(1, c[0]);
ASSERT_EQ(9, c[1]);
ASSERT_EQ(2, c[2]);
ASSERT_EQ(10, c[3]);
ASSERT_EQ(3, c[4]);
ASSERT_EQ(11, c[5]);
ASSERT_EQ(4, c[6]);
ASSERT_EQ(12, c[7]);
}
TEST(punpcklwd, testAlias) {
uint16_t a[8] = {1, 02, 03, 04, 05, 06, 07, 8};
uint16_t b[8] = {9, 10, 11, 12, 13, 14, 15, 16};
punpcklwd(a, a, b);
ASSERT_EQ(1, a[0]);
ASSERT_EQ(9, a[1]);
ASSERT_EQ(2, a[2]);
ASSERT_EQ(10, a[3]);
ASSERT_EQ(3, a[4]);
ASSERT_EQ(11, a[5]);
ASSERT_EQ(4, a[6]);
ASSERT_EQ(12, a[7]);
}
TEST(punpcklwd, pureAlias) {
uint16_t a[8] = {1, 02, 03, 04, 05, 06, 07, 8};
uint16_t b[8] = {9, 10, 11, 12, 13, 14, 15, 16};
(punpcklwd)(a, a, b);
ASSERT_EQ(1, a[0]);
ASSERT_EQ(9, a[1]);
ASSERT_EQ(2, a[2]);
ASSERT_EQ(10, a[3]);
ASSERT_EQ(3, a[4]);
ASSERT_EQ(11, a[5]);
ASSERT_EQ(4, a[6]);
ASSERT_EQ(12, a[7]);
}
TEST(punpcklwd, testAlias2) {
uint16_t a[8] = {1, 02, 03, 04, 05, 06, 07, 8};
uint16_t b[8] = {9, 10, 11, 12, 13, 14, 15, 16};
punpcklwd(b, a, b);
ASSERT_EQ(1, b[0]);
ASSERT_EQ(9, b[1]);
ASSERT_EQ(2, b[2]);
ASSERT_EQ(10, b[3]);
ASSERT_EQ(3, b[4]);
ASSERT_EQ(11, b[5]);
ASSERT_EQ(4, b[6]);
ASSERT_EQ(12, b[7]);
}
TEST(punpcklwd, pureAlias2) {
uint16_t a[8] = {1, 02, 03, 04, 05, 06, 07, 8};
uint16_t b[8] = {9, 10, 11, 12, 13, 14, 15, 16};
(punpcklwd)(b, a, b);
ASSERT_EQ(1, b[0]);
ASSERT_EQ(9, b[1]);
ASSERT_EQ(2, b[2]);
ASSERT_EQ(10, b[3]);
ASSERT_EQ(3, b[4]);
ASSERT_EQ(11, b[5]);
ASSERT_EQ(4, b[6]);
ASSERT_EQ(12, b[7]);
}
TEST(punpcklqdq, test) {
uint64_t a[2] = {1, 2};
uint64_t b[2] = {3, 4};
uint64_t c[2];
punpcklqdq(c, a, b);
ASSERT_EQ(1, c[0]);
ASSERT_EQ(3, c[1]);
}
TEST(punpcklqdq, pure) {
uint64_t a[2] = {1, 2};
uint64_t b[2] = {3, 4};
uint64_t c[2];
(punpcklqdq)(c, a, b);
ASSERT_EQ(1, c[0]);
ASSERT_EQ(3, c[1]);
}
TEST(punpcklqdq, testAlias) {
uint64_t a[2] = {1, 2};
uint64_t b[2] = {3, 4};
punpcklqdq(a, a, b);
ASSERT_EQ(1, a[0]);
ASSERT_EQ(3, a[1]);
}
TEST(punpcklqdq, pureAlias) {
uint64_t a[2] = {1, 2};
uint64_t b[2] = {3, 4};
(punpcklqdq)(a, a, b);
ASSERT_EQ(1, a[0]);
ASSERT_EQ(3, a[1]);
}
TEST(punpckldq, test) {
uint32_t a[4] = {1, 2, 3, 4};
uint32_t b[4] = {5, 6, 7, 8};
uint32_t c[4];
punpckldq(c, a, b);
ASSERT_EQ(1, c[0]);
ASSERT_EQ(5, c[1]);
ASSERT_EQ(2, c[2]);
ASSERT_EQ(6, c[3]);
}
TEST(punpckldq, pure) {
uint32_t a[4] = {1, 2, 3, 4};
uint32_t b[4] = {5, 6, 7, 8};
uint32_t c[4];
punpckldq(c, a, b);
ASSERT_EQ(1, c[0]);
ASSERT_EQ(5, c[1]);
ASSERT_EQ(2, c[2]);
ASSERT_EQ(6, c[3]);
}
TEST(punpckldq, testAlias) {
uint32_t a[4] = {1, 2, 3, 4};
uint32_t b[4] = {5, 6, 7, 8};
punpckldq(a, a, b);
ASSERT_EQ(1, a[0]);
ASSERT_EQ(5, a[1]);
ASSERT_EQ(2, a[2]);
ASSERT_EQ(6, a[3]);
}
TEST(punpckldq, pureAlias) {
uint32_t a[4] = {1, 2, 3, 4};
uint32_t b[4] = {5, 6, 7, 8};
(punpckldq)(a, a, b);
ASSERT_EQ(1, a[0]);
ASSERT_EQ(5, a[1]);
ASSERT_EQ(2, a[2]);
ASSERT_EQ(6, a[3]);
}
TEST(punpckldq, testAlias2) {
uint32_t a[4] = {1, 2, 3, 4};
uint32_t b[4] = {5, 6, 7, 8};
punpckldq(b, a, b);
ASSERT_EQ(1, b[0]);
ASSERT_EQ(5, b[1]);
ASSERT_EQ(2, b[2]);
ASSERT_EQ(6, b[3]);
}
TEST(punpckldq, pureAlias2) {
uint32_t a[4] = {1, 2, 3, 4};
uint32_t b[4] = {5, 6, 7, 8};
(punpckldq)(b, a, b);
ASSERT_EQ(1, b[0]);
ASSERT_EQ(5, b[1]);
ASSERT_EQ(2, b[2]);
ASSERT_EQ(6, b[3]);
}
TEST(punpcklqdq, fuzz) {
int i, j;
uint64_t x[2], y[2], a[2], b[2];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
punpcklqdq(a, x, y);
(punpcklqdq)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(punpckldq, fuzz) {
int i, j;
uint32_t x[4], y[4], a[4], b[4];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
punpckldq(a, x, y);
(punpckldq)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(punpcklbw, fuzz) {
int i, j;
uint8_t x[16], y[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
punpcklbw(a, x, y);
(punpcklbw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(punpckhwd, test) {
uint16_t a[8] = {1, 02, 03, 04, 05, 06, 07, 8};
uint16_t b[8] = {9, 10, 11, 12, 13, 14, 15, 16};
uint16_t c[8];
punpckhwd(c, a, b);
EXPECT_EQ(5, c[0]);
EXPECT_EQ(13, c[1]);
EXPECT_EQ(6, c[2]);
EXPECT_EQ(14, c[3]);
EXPECT_EQ(7, c[4]);
EXPECT_EQ(15, c[5]);
EXPECT_EQ(8, c[6]);
EXPECT_EQ(16, c[7]);
}
TEST(punpckhwd, pure) {
uint16_t a[8] = {1, 02, 03, 04, 05, 06, 07, 8};
uint16_t b[8] = {9, 10, 11, 12, 13, 14, 15, 16};
uint16_t c[8];
punpckhwd(c, a, b);
EXPECT_EQ(5, c[0]);
EXPECT_EQ(13, c[1]);
EXPECT_EQ(6, c[2]);
EXPECT_EQ(14, c[3]);
EXPECT_EQ(7, c[4]);
EXPECT_EQ(15, c[5]);
EXPECT_EQ(8, c[6]);
EXPECT_EQ(16, c[7]);
}
TEST(punpckhwd, testAlias) {
uint16_t a[8] = {1, 02, 03, 04, 05, 06, 07, 8};
uint16_t b[8] = {9, 10, 11, 12, 13, 14, 15, 16};
punpckhwd(a, a, b);
EXPECT_EQ(5, a[0]);
EXPECT_EQ(13, a[1]);
EXPECT_EQ(6, a[2]);
EXPECT_EQ(14, a[3]);
EXPECT_EQ(7, a[4]);
EXPECT_EQ(15, a[5]);
EXPECT_EQ(8, a[6]);
EXPECT_EQ(16, a[7]);
}
TEST(punpckhwd, pureAlias) {
uint16_t a[8] = {1, 02, 03, 04, 05, 06, 07, 8};
uint16_t b[8] = {9, 10, 11, 12, 13, 14, 15, 16};
(punpckhwd)(a, a, b);
EXPECT_EQ(5, a[0]);
EXPECT_EQ(13, a[1]);
EXPECT_EQ(6, a[2]);
EXPECT_EQ(14, a[3]);
EXPECT_EQ(7, a[4]);
EXPECT_EQ(15, a[5]);
EXPECT_EQ(8, a[6]);
EXPECT_EQ(16, a[7]);
}
TEST(punpckhwd, testAlias2) {
uint16_t a[8] = {1, 02, 03, 04, 05, 06, 07, 8};
uint16_t b[8] = {9, 10, 11, 12, 13, 14, 15, 16};
punpckhwd(b, a, b);
EXPECT_EQ(5, b[0]);
EXPECT_EQ(13, b[1]);
EXPECT_EQ(6, b[2]);
EXPECT_EQ(14, b[3]);
EXPECT_EQ(7, b[4]);
EXPECT_EQ(15, b[5]);
EXPECT_EQ(8, b[6]);
EXPECT_EQ(16, b[7]);
}
TEST(punpckhwd, pureAlias2) {
uint16_t a[8] = {1, 02, 03, 04, 05, 06, 07, 8};
uint16_t b[8] = {9, 10, 11, 12, 13, 14, 15, 16};
(punpckhwd)(b, a, b);
EXPECT_EQ(5, b[0]);
EXPECT_EQ(13, b[1]);
EXPECT_EQ(6, b[2]);
EXPECT_EQ(14, b[3]);
EXPECT_EQ(7, b[4]);
EXPECT_EQ(15, b[5]);
EXPECT_EQ(8, b[6]);
EXPECT_EQ(16, b[7]);
}
TEST(punpckhqdq, test) {
uint64_t a[2] = {1, 2};
uint64_t b[2] = {3, 4};
uint64_t c[2];
punpckhqdq(c, a, b);
EXPECT_EQ(2, c[0]);
EXPECT_EQ(4, c[1]);
}
TEST(punpckhqdq, pure) {
uint64_t a[2] = {1, 2};
uint64_t b[2] = {3, 4};
uint64_t c[2];
(punpckhqdq)(c, a, b);
EXPECT_EQ(2, c[0]);
EXPECT_EQ(4, c[1]);
}
TEST(punpckhqdq, testAlias) {
uint64_t a[2] = {1, 2};
uint64_t b[2] = {3, 4};
punpckhqdq(a, a, b);
EXPECT_EQ(2, a[0]);
EXPECT_EQ(4, a[1]);
}
TEST(punpckhqdq, pureAlias) {
uint64_t a[2] = {1, 2};
uint64_t b[2] = {3, 4};
(punpckhqdq)(a, a, b);
EXPECT_EQ(2, a[0]);
EXPECT_EQ(4, a[1]);
}
TEST(punpckhdq, test) {
uint32_t a[4] = {1, 2, 3, 4};
uint32_t b[4] = {5, 6, 7, 8};
uint32_t c[4];
punpckhdq(c, a, b);
EXPECT_EQ(3, c[0]);
EXPECT_EQ(7, c[1]);
EXPECT_EQ(4, c[2]);
EXPECT_EQ(8, c[3]);
}
TEST(punpckhdq, pure) {
uint32_t a[4] = {1, 2, 3, 4};
uint32_t b[4] = {5, 6, 7, 8};
uint32_t c[4];
punpckhdq(c, a, b);
EXPECT_EQ(3, c[0]);
EXPECT_EQ(7, c[1]);
EXPECT_EQ(4, c[2]);
EXPECT_EQ(8, c[3]);
}
TEST(punpckhdq, testAlias) {
uint32_t a[4] = {1, 2, 3, 4};
uint32_t b[4] = {5, 6, 7, 8};
punpckhdq(a, a, b);
EXPECT_EQ(3, a[0]);
EXPECT_EQ(7, a[1]);
EXPECT_EQ(4, a[2]);
EXPECT_EQ(8, a[3]);
}
TEST(punpckhdq, pureAlias) {
uint32_t a[4] = {1, 2, 3, 4};
uint32_t b[4] = {5, 6, 7, 8};
(punpckhdq)(a, a, b);
EXPECT_EQ(3, a[0]);
EXPECT_EQ(7, a[1]);
EXPECT_EQ(4, a[2]);
EXPECT_EQ(8, a[3]);
}
TEST(punpckhdq, testAlias2) {
uint32_t a[4] = {1, 2, 3, 4};
uint32_t b[4] = {5, 6, 7, 8};
punpckhdq(b, a, b);
EXPECT_EQ(3, b[0]);
EXPECT_EQ(7, b[1]);
EXPECT_EQ(4, b[2]);
EXPECT_EQ(8, b[3]);
}
TEST(punpckhdq, pureAlias2) {
uint32_t a[4] = {1, 2, 3, 4};
uint32_t b[4] = {5, 6, 7, 8};
(punpckhdq)(b, a, b);
EXPECT_EQ(3, b[0]);
EXPECT_EQ(7, b[1]);
EXPECT_EQ(4, b[2]);
EXPECT_EQ(8, b[3]);
}
TEST(punpckhwd, fuzz) {
int i, j;
uint16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
punpckhwd(a, x, y);
(punpckhwd)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(punpckhqdq, fuzz) {
int i, j;
uint64_t x[2], y[2], a[2], b[2];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
punpckhqdq(a, x, y);
(punpckhqdq)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(punpckhdq, fuzz) {
int i, j;
uint32_t x[4], y[4], a[4], b[4];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
punpckhdq(a, x, y);
(punpckhdq)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(punpckhbw, fuzz) {
int i, j;
uint8_t x[16], y[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
punpckhbw(a, x, y);
(punpckhbw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psubq, fuzz) {
int i, j;
int64_t x[2], y[2], a[2], b[2];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
psubq(a, x, y);
(psubq)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psrawv, testSmallShift) {
int16_t A[8] = {-1, -2, SHRT_MIN, 2};
uint64_t B[2] = {1};
psrawv(A, A, B);
ASSERT_EQ(-1, A[0]);
ASSERT_EQ(-1, A[1]);
ASSERT_EQ(-16384, A[2]);
ASSERT_EQ(1, A[3]);
ASSERT_EQ(0, A[4]);
}
TEST(psraw, testSmallShift) {
int16_t A[8] = {-1, -2, SHRT_MIN, 2};
psraw(A, A, 1);
ASSERT_EQ(-1, A[0]);
ASSERT_EQ(-1, A[1]);
ASSERT_EQ(-16384, A[2]);
ASSERT_EQ(1, A[3]);
ASSERT_EQ(0, A[4]);
}
TEST(psraw, pureSmallShift) {
int16_t A[8] = {-1, -2, SHRT_MIN, 2};
(psraw)(A, A, 1);
ASSERT_EQ(-1, A[0]);
ASSERT_EQ(-1, A[1]);
ASSERT_EQ(-16384, A[2]);
ASSERT_EQ(1, A[3]);
ASSERT_EQ(0, A[4]);
}
TEST(psraw, testBigShift_saturatesCount) {
int16_t A[8] = {-1, -2, SHRT_MIN, 2};
psraw(A, A, 77);
ASSERT_EQ(-1, A[0]);
ASSERT_EQ(-1, A[1]);
ASSERT_EQ(-1, A[2]);
ASSERT_EQ(0, A[3]);
ASSERT_EQ(0, A[4]);
}
TEST(psraw, pureBigShift_saturatesCount) {
int16_t A[8] = {-1, -2, SHRT_MIN, 2};
(psraw)(A, A, 77);
ASSERT_EQ(-1, A[0]);
ASSERT_EQ(-1, A[1]);
ASSERT_EQ(-1, A[2]);
ASSERT_EQ(0, A[3]);
ASSERT_EQ(0, A[4]);
}
TEST(psradv, testSmallShift) {
int32_t A[8] = {-1, -2, INT32_MIN, 2};
uint64_t B[2] = {1};
psradv(A, A, B);
ASSERT_EQ(-1, A[0]);
ASSERT_EQ(-1, A[1]);
ASSERT_EQ(-1073741824, A[2]);
ASSERT_EQ(1, A[3]);
ASSERT_EQ(0, A[4]);
}
TEST(psradv, test) {
int i, j;
int32_t x[4], a[4], b[4];
uint64_t y[2];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
for (j = 0; j < 2; ++j) {
y[j] = Rando() % 64;
}
psradv(a, x, y);
(psradv)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psrad, testSmallShift) {
int32_t A[4] = {-1, -2, INT32_MIN, 2};
psrad(A, A, 1);
ASSERT_EQ(-1, A[0]);
ASSERT_EQ(-1, A[1]);
ASSERT_EQ(-1073741824, A[2]);
ASSERT_EQ(1, A[3]);
}
TEST(psrad, pureSmallShift) {
int32_t A[4] = {-1, -2, INT32_MIN, 2};
(psrad)(A, A, 1);
ASSERT_EQ(-1, A[0]);
ASSERT_EQ(-1, A[1]);
ASSERT_EQ(-1073741824, A[2]);
ASSERT_EQ(1, A[3]);
}
TEST(psrad, testBigShift_saturatesCount) {
int32_t A[4] = {-1, -2, INT32_MIN, 2};
psrad(A, A, 77);
ASSERT_EQ(-1, A[0]);
ASSERT_EQ(-1, A[1]);
ASSERT_EQ(-1, A[2]);
ASSERT_EQ(0, A[3]);
}
TEST(psrad, pureBigShift_saturatesCount) {
int32_t A[4] = {-1, -2, INT32_MIN, 2};
(psrad)(A, A, 77);
ASSERT_EQ(-1, A[0]);
ASSERT_EQ(-1, A[1]);
ASSERT_EQ(-1, A[2]);
ASSERT_EQ(0, A[3]);
}
TEST(psllwv, test) {
int i, j;
uint16_t x[8], a[8], b[8];
uint64_t y[2];
for (i = 0; i < 32; ++i) {
RngSet(x, sizeof(x));
for (j = 0; j < 2; ++j) {
y[j] = Rando() % 300;
}
psllwv(a, x, y);
(psllwv)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psllw, testSmallShift) {
uint16_t A[8] = {0, 1, 0xffff, 2};
psllw(A, A, 1);
ASSERT_EQ(0, A[0]);
ASSERT_EQ(2, A[1]);
ASSERT_EQ(0xfffe, A[2]);
ASSERT_EQ(4, A[3]);
}
TEST(psllwv, testSmallShift) {
uint16_t A[8] = {0, 1, 0xffff, 2};
uint64_t B[2] = {1};
psllwv(A, A, B);
ASSERT_EQ(0, A[0]);
ASSERT_EQ(2, A[1]);
ASSERT_EQ(0xfffe, A[2]);
ASSERT_EQ(4, A[3]);
}
TEST(pslldv, test) {
int i, j;
uint32_t x[4], a[4], b[4];
uint64_t y[2];
for (i = 0; i < 32; ++i) {
RngSet(x, sizeof(x));
for (j = 0; j < 2; ++j) {
y[j] = Rando() % 300;
}
pslldv(a, x, y);
(pslldv)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pslld, testSmallShift) {
uint32_t A[8] = {0, 1, 0xffffffff, 2};
pslld(A, A, 1);
ASSERT_EQ(0, A[0]);
ASSERT_EQ(2, A[1]);
ASSERT_EQ(0xfffffffe, A[2]);
ASSERT_EQ(4, A[3]);
}
TEST(pslldv, testSmallShift) {
uint32_t A[8] = {0, 1, 0xffffffff, 2};
uint64_t B[2] = {1};
pslldv(A, A, B);
ASSERT_EQ(0, A[0]);
ASSERT_EQ(2, A[1]);
ASSERT_EQ(0xfffffffe, A[2]);
ASSERT_EQ(4, A[3]);
}
TEST(pmulhuw, test) {
uint16_t x[8] = {0, 0xffff, 0x0000, 0x0001, 0x8000};
uint16_t y[8] = {0, 0xffff, 0xffff, 0xffff, 0x8000};
uint16_t z[8];
pmulhuw(z, x, y);
ASSERT_EQ(0x0000 /*0000*/, z[0]);
ASSERT_EQ(0xfffe /*0001*/, z[1]);
ASSERT_EQ(0x0000 /*0000*/, z[2]);
ASSERT_EQ(0x0000 /*ffff*/, z[3]);
ASSERT_EQ(0x4000 /*0000*/, z[4]);
}
TEST(pmulhuw, pure) {
uint16_t x[8] = {0, 0xffff, 0x0000, 0x0001, 0x8000};
uint16_t y[8] = {0, 0xffff, 0xffff, 0xffff, 0x8000};
uint16_t z[8];
(pmulhuw)(z, x, y);
ASSERT_EQ(0x0000 /*0000*/, z[0]);
ASSERT_EQ(0xfffe /*0001*/, z[1]);
ASSERT_EQ(0x0000 /*0000*/, z[2]);
ASSERT_EQ(0x0000 /*ffff*/, z[3]);
ASSERT_EQ(0x4000 /*0000*/, z[4]);
}
TEST(pmulhuw, fuzz) {
int i, j;
uint16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pmulhuw(a, x, y);
(pmulhuw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pmulhuw(a, (void *)a, y);
(pmulhuw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pmulhw, fuzz) {
int i, j;
int16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 1000; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pmulhw(a, x, y);
(pmulhw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pmulhw(a, (void *)a, y);
(pmulhw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pmullw, fuzz) {
int i, j;
int16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 1000; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pmullw(a, x, y);
(pmullw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pmullw(a, (void *)a, y);
(pmullw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pmulld, fuzz) {
int i, j;
int32_t x[4], y[4], a[4], b[4];
for (i = 0; i < 1000; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pmulld(a, x, y);
(pmulld)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pmulld(a, (void *)a, y);
(pmulld)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pmuludq, fuzz) {
int i, j;
uint32_t x[4], y[4];
uint64_t a[2], b[2];
for (i = 0; i < 1000; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pmuludq(a, x, y);
(pmuludq)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pmuludq(a, (void *)a, y);
(pmuludq)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pmaddwd, fuzz) {
int i, j;
int16_t x[8], y[8];
int32_t a[4], b[4];
for (i = 0; i < 1000; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pmaddwd(a, x, y);
(pmaddwd)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pmaddwd(a, (void *)a, y);
(pmaddwd)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(phaddw, fuzz) {
int i, j;
int16_t x[8], y[8];
int16_t a[8], b[8];
for (i = 0; i < 1000; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
phaddw(a, x, y);
(phaddw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
phaddw(a, (void *)a, y);
(phaddw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(phaddd, fuzz) {
int i, j;
int32_t x[4], y[4];
int32_t a[4], b[4];
for (i = 0; i < 1000; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
phaddd(a, x, y);
(phaddd)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
phaddd(a, (void *)a, y);
(phaddd)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(phsubw, fuzz) {
int i, j;
int16_t x[8], y[8];
int16_t a[8], b[8];
for (i = 0; i < 1000; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
phsubw(a, x, y);
(phsubw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
phsubw(a, (void *)a, y);
(phsubw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(phsubd, fuzz) {
int i, j;
int32_t x[4], y[4];
int32_t a[4], b[4];
for (i = 0; i < 1000; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
phsubd(a, x, y);
(phsubd)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
phsubd(a, (void *)a, y);
(phsubd)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(phaddsw, fuzz) {
int i, j;
int16_t x[8], y[8];
int16_t a[8], b[8];
for (i = 0; i < 1000; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
phaddsw(a, x, y);
(phaddsw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
phaddsw(a, (void *)a, y);
(phaddsw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(phsubsw, fuzz) {
int i, j;
int16_t x[8], y[8];
int16_t a[8], b[8];
for (i = 0; i < 1000; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
phsubsw(a, x, y);
(phsubsw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
phsubsw(a, (void *)a, y);
(phsubsw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(phaddw, testOverflow_wrapsAround) {
short M[2][8] = {
{0x7fff, 0, 0x7fff, 1, 13004, -30425, 20777, -16389},
{-28040, 13318, -1336, -24798, -13876, 3599, -7346, -23575},
};
phaddw(M[0], M[0], M[1]);
EXPECT_SHRTMATRIXEQ(2, 8, M, "\n\
32767 -32768 -17421 4388 -14722 -26134 -10277 -30921\n\
-28040 13318 -1336 -24798 -13876 3599 -7346 -23575");
}
TEST(phaddw, testAliasing_isOk) {
short M[1][8] = {
{0, 1, 2, 3, 4, 5, 6, 7},
};
phaddw(M[0], M[0], M[0]);
EXPECT_SHRTMATRIXEQ(1, 8, M, "\n\
1 5 9 13 1 5 9 13");
}
TEST(phaddsw, testOverflow_saturates) {
short M[2][8] = {
{0x7fff, 0, 0x7fff, 1, 0x7fff, 0x7fff, 20777, -16389},
{-28040, 13318, -1336, -24798, -13876, 3599, -7346, -23575},
};
phaddsw(M[0], M[0], M[1]);
EXPECT_SHRTMATRIXEQ(2, 8, M, "\n\
32767 32767 32767 4388 -14722 -26134 -10277 -30921\n\
-28040 13318 -1336 -24798 -13876 3599 -7346 -23575");
}
TEST(phaddsw, testAliasing_isOk) {
short M[1][8] = {{0, 1, 2, 3, 4, 5, 6, 7}};
phaddsw(M[0], M[0], M[0]);
EXPECT_SHRTMATRIXEQ(1, 8, M, "\n\
1 5 9 13 1 5 9 13");
}
TEST(pcmpgtb, test) {
int i, j;
int8_t x[16], y[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
static int count;
pcmpgtb(a, x, y);
(pcmpgtb)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pcmpeqb, test) {
int i, j;
uint8_t x[16], y[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pcmpeqb(a, x, y);
(pcmpeqb)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pcmpeqd, test) {
int i, j;
int32_t x[4], y[4], a[4], b[4];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pcmpeqd(a, x, y);
(pcmpeqd)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pcmpgtd, test) {
int i, j;
int32_t x[4], y[4], a[4], b[4];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pcmpgtd(a, x, y);
(pcmpgtd)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pcmpeqw, test) {
int i, j;
int16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pcmpeqw(a, x, y);
(pcmpeqw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pcmpgtw, test) {
int i, j;
int16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pcmpgtw(a, x, y);
(pcmpgtw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(por, fuzz) {
int i, j;
uint64_t x[2], y[2], a[2], b[2];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 2; ++j) x[j] = Rando();
for (j = 0; j < 2; ++j) y[j] = Rando();
por(a, x, y);
(por)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
por(a, (void *)a, y);
(por)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pxor, fuzz) {
int i, j;
uint64_t x[2], y[2], a[2], b[2];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 2; ++j) x[j] = Rando();
for (j = 0; j < 2; ++j) y[j] = Rando();
pxor(a, x, y);
(pxor)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pxor(a, (void *)a, y);
(pxor)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pand, fuzz) {
int i, j;
uint64_t x[2], y[2], a[2], b[2];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 2; ++j) x[j] = Rando();
for (j = 0; j < 2; ++j) y[j] = Rando();
pand(a, x, y);
(pand)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pand(a, (void *)a, y);
(pand)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pandn, fuzz) {
int i, j;
uint64_t x[2], y[2], a[2], b[2];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 2; ++j) x[j] = Rando();
for (j = 0; j < 2; ++j) y[j] = Rando();
pandn(a, x, y);
(pandn)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pandn(a, (void *)a, y);
(pandn)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(paddq, fuzz) {
int i, j;
int64_t x[2], y[2], a[2], b[2];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 2; ++j) x[j] = Rando();
for (j = 0; j < 2; ++j) y[j] = Rando();
paddq(a, x, y);
(paddq)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
paddq(a, (void *)a, y);
(paddq)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pavgb, fuzz) {
int i, j;
uint8_t x[16], y[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pavgb(a, x, y);
(pavgb)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pavgb(a, (void *)a, y);
(pavgb)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pavgw, fuzz) {
int i, j;
uint16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pavgw(a, x, y);
(pavgw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pavgw(a, (void *)a, y);
(pavgw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(punpcklwd, fuzz) {
int i, j;
uint16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
punpcklwd(a, x, y);
(punpcklwd)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
punpcklwd(a, a, y);
(punpcklwd)(b, b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
punpcklwd(a, y, a);
(punpcklwd)(b, y, b);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pminub, fuzz) {
int i, j;
uint8_t x[16], y[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pminub(a, x, y);
(pminub)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pminub(a, (void *)a, y);
(pminub)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pminsw, fuzz) {
int i, j;
int16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pminsw(a, x, y);
(pminsw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pminsw(a, (void *)a, y);
(pminsw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pmaxub, fuzz) {
int i, j;
uint8_t x[16], y[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pmaxub(a, x, y);
(pmaxub)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pmaxub(a, (void *)a, y);
(pmaxub)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pmaxsw, fuzz) {
int i, j;
int16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pmaxsw(a, x, y);
(pmaxsw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pmaxsw(a, (void *)a, y);
(pmaxsw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(paddw, test) {
int16_t A[8] = {7};
int16_t B[8] = {11};
int16_t C[8];
paddw(C, A, B);
ASSERT_EQ(18, C[0]);
}
TEST(paddw, testOverflow_wrapsAround) {
int16_t A[8] = {SHRT_MAX, SHRT_MIN};
int16_t B[8] = {1, -1};
paddw(A, A, B);
ASSERT_EQ(SHRT_MIN, A[0]);
ASSERT_EQ(SHRT_MAX, A[1]);
}
TEST(paddw, fuzz) {
int i, j;
int16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
paddw(a, x, y);
(paddw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
paddw(a, (void *)a, y);
(paddw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(paddsw, test) {
int16_t A[8] = {7};
int16_t B[8] = {11};
int16_t C[8];
paddsw(C, A, B);
ASSERT_EQ(18, C[0]);
}
TEST(paddsw, testOverflow_saturates) {
int16_t A[8] = {SHRT_MAX, SHRT_MIN};
int16_t B[8] = {1, -1};
paddsw(A, A, B);
ASSERT_EQ(SHRT_MAX, A[0]);
ASSERT_EQ(SHRT_MIN, A[1]);
}
TEST(paddusw, fuzz) {
int i, j;
uint16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
paddusw(a, x, y);
(paddusw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
paddusw(a, (void *)a, y);
(paddusw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psubb, fuzz) {
int i, j;
int8_t x[16], y[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
psubb(a, x, y);
(psubb)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psubb(a, (void *)a, y);
(psubb)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psubw, fuzz) {
int i, j;
int16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
psubw(a, x, y);
(psubw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psubw(a, (void *)a, y);
(psubw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psubusw, fuzz) {
int i, j;
uint16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
psubusw(a, x, y);
(psubusw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psubusw(a, (void *)a, y);
(psubusw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(paddusb, fuzz) {
int i, j;
uint8_t x[16], y[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
paddusb(a, x, y);
(paddusb)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
paddusb(a, (void *)a, y);
(paddusb)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psubusb, fuzz) {
int i, j;
uint8_t x[16], y[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
psubusb(a, x, y);
(psubusb)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psubusb(a, (void *)a, y);
(psubusb)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pabsb, fuzz) {
int i, j;
int8_t x[16];
uint8_t a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
pabsb(a, x);
(pabsb)(b, x);
ASSERT_EQ(0, memcmp(a, b, 16), "%d\n\t%`#.16s\n\t%`#.16s\n\t%`#.16s", i, x,
a, b);
}
}
TEST(pabsw, fuzz) {
int i, j;
int16_t x[8];
uint16_t a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
pabsw(a, x);
(pabsw)(b, x);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pabsd, fuzz) {
int i, j;
int32_t x[4];
uint32_t a[4], b[4];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
pabsd(a, x);
(pabsd)(b, x);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psignb, fuzz) {
int i, j;
int8_t x[16], y[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
psignb(a, x, y);
(psignb)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psignb(a, (void *)a, y);
(psignb)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psignw, fuzz) {
int i, j;
int16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
psignw(a, x, y);
(psignw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psignw(a, (void *)a, y);
(psignw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psignd, fuzz) {
int i, j;
int32_t x[4], y[4], a[4], b[4];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
psignd(a, x, y);
(psignd)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psignd(a, (void *)a, y);
(psignd)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(paddb, fuzz) {
int i, j;
int8_t x[16], y[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
paddb(a, x, y);
(paddb)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
paddb(a, (void *)a, y);
(paddb)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(paddsb, fuzz) {
int i, j;
int8_t x[16], y[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
paddsb(a, x, y);
(paddsb)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
paddsb(a, (void *)a, y);
(paddsb)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(paddsw, fuzz) {
int i, j;
int16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
paddsw(a, x, y);
(paddsw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
paddsw(a, (void *)a, y);
(paddsw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psubsb, fuzz) {
int i, j;
int8_t x[16], y[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
psubsb(a, x, y);
(psubsb)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psubsb(a, (void *)a, y);
(psubsb)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psubsw, fuzz) {
int i, j;
int16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
psubsw(a, x, y);
(psubsw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psubsw(a, (void *)a, y);
(psubsw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(paddd, fuzz) {
int i, j;
int32_t x[4], y[4], a[4], b[4];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
paddd(a, x, y);
(paddd)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
paddd(a, (void *)a, y);
(paddd)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pshufb, fuzz) {
int i, j;
uint8_t x[16], y[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pshufb(a, x, y);
(pshufb)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pshufb(a, (void *)a, y);
(pshufb)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pshufd, fuzz) {
int i, j;
int32_t x[4], a[4], b[4];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 4; ++j) x[j] = Rando();
#define T(IMM) \
pshufd(a, x, IMM); \
(pshufd)(b, x, IMM); \
ASSERT_EQ(0, memcmp(a, b, 16)); \
pshufd(a, (void *)a, IMM); \
(pshufd)(b, (void *)b, IMM); \
ASSERT_EQ(0, memcmp(a, b, 16))
T(0b00000011);
T(0b00000110);
T(0b00001100);
T(0b00011000);
T(0b00110000);
T(0b01100000);
T(0b11000000);
T(0b10000000);
#undef T
}
}
TEST(pshuflw, fuzz) {
int i, j;
int16_t x[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 8; ++j) x[j] = Rando();
#define T(IMM) \
pshuflw(a, x, IMM); \
(pshuflw)(b, x, IMM); \
ASSERT_EQ(0, memcmp(a, b, 16)); \
pshuflw(a, (void *)a, IMM); \
(pshuflw)(b, (void *)b, IMM); \
ASSERT_EQ(0, memcmp(a, b, 16))
T(0b00000011);
T(0b00000110);
T(0b00001100);
T(0b00011000);
T(0b00110000);
T(0b01100000);
T(0b11000000);
T(0b10000000);
#undef T
}
}
TEST(pshufhw, fuzz) {
int i, j;
int16_t x[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 8; ++j) x[j] = Rando();
#define T(IMM) \
pshufhw(a, x, IMM); \
(pshufhw)(b, x, IMM); \
ASSERT_EQ(0, memcmp(a, b, 16)); \
pshufhw(a, (void *)a, IMM); \
(pshufhw)(b, (void *)b, IMM); \
ASSERT_EQ(0, memcmp(a, b, 16))
T(0b00000011);
T(0b00000110);
T(0b00001100);
T(0b00011000);
T(0b00110000);
T(0b01100000);
T(0b11000000);
T(0b10000000);
#undef T
}
}
TEST(packuswb, test) {
const short S[8] = {0, 128, -128, 255, SHRT_MAX, SHRT_MIN, 0, 0};
unsigned char B[16] = {0};
packuswb(B, S, S);
ASSERT_EQ(0, B[0]);
ASSERT_EQ(128, B[1]);
ASSERT_EQ(0, B[2]);
ASSERT_EQ(255, B[3]);
ASSERT_EQ(255, B[4]);
ASSERT_EQ(0, B[5]);
ASSERT_EQ(0, B[6]);
ASSERT_EQ(0, B[7]);
ASSERT_EQ(0, B[8]);
ASSERT_EQ(128, B[9]);
ASSERT_EQ(0, B[10]);
ASSERT_EQ(255, B[11]);
ASSERT_EQ(255, B[12]);
ASSERT_EQ(0, B[13]);
ASSERT_EQ(0, B[14]);
ASSERT_EQ(0, B[15]);
}
TEST(packsswb, test) {
const short S[8] = {0, 128, -128, 255, SHRT_MAX, SHRT_MIN, 0, 0};
signed char B[16] = {0};
packsswb(B, S, S);
ASSERT_EQ(0, B[0]);
ASSERT_EQ(127, B[1]);
ASSERT_EQ(-128, B[2]);
ASSERT_EQ(127, B[3]);
ASSERT_EQ(127, B[4]);
ASSERT_EQ(-128, B[5]);
ASSERT_EQ(0, B[6]);
ASSERT_EQ(0, B[7]);
ASSERT_EQ(0, B[8]);
ASSERT_EQ(127, B[9]);
ASSERT_EQ(-128, B[10]);
ASSERT_EQ(127, B[11]);
ASSERT_EQ(127, B[12]);
ASSERT_EQ(-128, B[13]);
ASSERT_EQ(0, B[14]);
ASSERT_EQ(0, B[15]);
}
TEST(packssdw, testAlias) {
int i, j;
union {
int16_t out[8];
int32_t in1[4];
} u;
int16_t a[8], b[8];
int32_t x[4], y[4];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
memcpy(u.in1, x, sizeof(x));
packssdw(u.out, u.in1, y);
memcpy(a, u.out, sizeof(u.out));
memcpy(u.in1, x, sizeof(x));
(packssdw)(u.out, u.in1, y);
memcpy(b, u.out, sizeof(u.out));
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(packusdw, test) {
int i, j;
int32_t x[4], y[4];
uint16_t a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
packusdw(a, x, y);
(packusdw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(packuswb, fuzz) {
int i, j;
int16_t x[8], y[8];
uint8_t a[16], b[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
packuswb(a, x, y);
(packuswb)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
packuswb(a, x, x);
(packuswb)(b, x, x);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(packssdw, test) {
int i, j;
int32_t x[4], y[4];
int16_t a[8], b[8];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
packssdw(a, x, y);
(packssdw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psllwv, fuzz) {
int i, j;
uint64_t y[2];
uint16_t x[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 8; ++j) x[j] = Rando();
for (j = 0; j < 2; ++j) y[j] = Rando() % 64;
psllwv(a, x, y);
(psllwv)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psllwv(a, (void *)a, y);
(psllwv)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pslldv, fuzz) {
int i, j;
uint64_t y[2];
uint32_t x[4], a[4], b[4];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 4; ++j) x[j] = Rando();
for (j = 0; j < 2; ++j) y[j] = Rando() % 64;
pslldv(a, x, y);
(pslldv)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pslldv(a, (void *)a, y);
(pslldv)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psllqv, fuzz) {
int i, j;
uint64_t y[2];
uint64_t x[2], a[2], b[2];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 2; ++j) x[j] = Rando();
for (j = 0; j < 2; ++j) y[j] = Rando() % 64;
psllqv(a, x, y);
(psllqv)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psllqv(a, (void *)a, y);
(psllqv)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psrlwv, fuzz) {
int i, j;
uint64_t y[2];
uint16_t x[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 8; ++j) x[j] = Rando();
for (j = 0; j < 2; ++j) y[j] = Rando() % 64;
psrlwv(a, x, y);
(psrlwv)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psrlwv(a, (void *)a, y);
(psrlwv)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psrldv, fuzz) {
int i, j;
uint64_t y[2];
uint32_t x[4], a[4], b[4];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 4; ++j) x[j] = Rando();
for (j = 0; j < 2; ++j) y[j] = Rando() % 64;
psrldv(a, x, y);
(psrldv)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psrldv(a, (void *)a, y);
(psrldv)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psrlqv, fuzz) {
int i, j;
uint64_t y[2];
uint64_t x[2], a[2], b[2];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 2; ++j) x[j] = Rando();
for (j = 0; j < 2; ++j) y[j] = Rando() % 64;
psrlqv(a, x, y);
(psrlqv)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psrlqv(a, (void *)a, y);
(psrlqv)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psrawv, fuzz) {
int i, j;
uint64_t y[2];
int16_t x[8], a[8], b[8];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 8; ++j) x[j] = Rando();
for (j = 0; j < 2; ++j) y[j] = Rando() % 64;
psrawv(a, x, y);
(psrawv)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psrawv(a, (void *)a, y);
(psrawv)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psradv, fuzz) {
int i, j;
uint64_t y[2];
int32_t x[4], a[4], b[4];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 4; ++j) x[j] = Rando();
for (j = 0; j < 2; ++j) y[j] = Rando() % 64;
psradv(a, x, y);
(psradv)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
psradv(a, (void *)a, y);
(psradv)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(psrldq, fuzz) {
int i, n;
uint8_t x[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
memset(a, -1, sizeof(a));
memset(b, -1, sizeof(b));
RngSet(x, sizeof(x));
n = Rando() % 20;
psrldq(a, x, n);
(psrldq)(b, x, n);
ASSERT_EQ(0, memcmp(a, b, 16), "%d\n\t%`#.16s\n\t%`#.16s\n\t%`#.16s", n, x,
a, b);
n = Rando() % 20;
psrldq(a, a, n);
(psrldq)(b, b, n);
ASSERT_EQ(0, memcmp(a, b, 16), "%d\n\t%`#.16s\n\t%`#.16s\n\t%`#.16s", n, x,
a, b);
}
}
TEST(pslldq, fuzz) {
int i, n;
uint8_t x[16], a[16], b[16];
for (i = 0; i < 100; ++i) {
memset(a, -1, sizeof(a));
memset(b, -1, sizeof(b));
RngSet(x, sizeof(x));
n = Rando() % 20;
pslldq(a, x, n);
(pslldq)(b, x, n);
ASSERT_EQ(0, memcmp(a, b, 16), "%d\n\t%`#.16s\n\t%`#.16s\n\t%`#.16s", n, x,
a, b);
n = Rando() % 20;
pslldq(a, a, n);
(pslldq)(b, b, n);
ASSERT_EQ(0, memcmp(a, b, 16), "%d\n\t%`#.16s\n\t%`#.16s\n\t%`#.16s", n, x,
a, b);
}
}
TEST(psadbw, test) {
int i, j;
uint64_t a[2], b[2];
uint8_t x[16], y[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
psadbw(a, x, y);
(psadbw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pmulhrsw, fuzz) {
int i, j;
int16_t x[8], y[8], a[8], b[8];
for (i = 0; i < 1000; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pmulhrsw(a, x, y);
(pmulhrsw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pmulhrsw(a, (void *)a, y);
(pmulhrsw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(mpsadbw, fuzz) {
int i, j;
uint16_t a[8], b[8];
uint8_t x[16], y[16];
for (i = 0; i < 100; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
for (j = 0; j < 8; ++j) {
mpsadbw(a, x, y, j);
(mpsadbw)(b, x, y, j);
ASSERT_EQ(0, memcmp(a, b, 16), "%d %d", i, j);
}
}
}
TEST(pmaddubsw, fuzz) {
int i, j;
int8_t y[16];
uint8_t x[16];
int16_t a[8], b[8];
for (i = 0; i < 1000; ++i) {
RngSet(x, sizeof(x));
RngSet(y, sizeof(y));
pmaddubsw(a, x, y);
(pmaddubsw)(b, x, y);
ASSERT_EQ(0, memcmp(a, b, 16));
pmaddubsw(a, (void *)a, y);
(pmaddubsw)(b, (void *)b, y);
ASSERT_EQ(0, memcmp(a, b, 16));
}
}
TEST(pshufw, fuzz) {
int i, j;
uint8_t y;
int16_t x[4], a[4], b[4];
for (i = 0; i < 100; ++i) {
for (j = 0; j < 4; ++j) x[j] = Rando();
pshufw(a, x, 0b10111111);
(pshufw)(b, x, 0b10111111);
ASSERT_EQ(0, memcmp(a, b, 8));
pshufw(a, (void *)a, 0b10111111);
(pshufw)(b, (void *)b, 0b10111111);
ASSERT_EQ(0, memcmp(a, b, 8));
pshufw(a, x, 0b00001000);
(pshufw)(b, x, 0b00001000);
ASSERT_EQ(0, memcmp(a, b, 8));
pshufw(a, x, 0b00010001);
(pshufw)(b, x, 0b00010001);
ASSERT_EQ(0, memcmp(a, b, 8));
pshufw(a, x, 0b01110100);
(pshufw)(b, x, 0b01110100);
ASSERT_EQ(0, memcmp(a, b, 8));
pshufw(a, x, 0b01101101);
(pshufw)(b, x, 0b01101101);
ASSERT_EQ(0, memcmp(a, b, 8));
pshufw(a, x, 0b10011011);
(pshufw)(b, x, 0b10011011);
ASSERT_EQ(0, memcmp(a, b, 8));
pshufw(a, x, 0b10111000);
(pshufw)(b, x, 0b10111000);
ASSERT_EQ(0, memcmp(a, b, 8));
pshufw(a, x, 0b11000111);
(pshufw)(b, x, 0b11000111);
ASSERT_EQ(0, memcmp(a, b, 8));
}
}
TEST(pcmpeqw, test2) {
int16_t kNumbers16[] = {0, 1, 2, 123, 0xffff, 0xfffe, 0x8000, 0x8001, 0x8080};
int i, j, k;
int16_t a[8], b[8], x[8], y[8];
for (i = 0; i < ARRAYLEN(kNumbers16); ++i) {
for (j = 0; j < ARRAYLEN(kNumbers16); ++j) {
for (k = 0; k < 8; ++k) {
x[k] = kNumbers16[(i + k) % ARRAYLEN(kNumbers16)];
y[k] = kNumbers16[(j + k) % ARRAYLEN(kNumbers16)];
}
pcmpeqw(a, x, y);
(pcmpeqw)(b, x, y);
EXPECT_EQ(0, memcmp(a, b, 16));
}
}
}
BENCH(psrldq, bench) {
volatile uint8_t A[16];
volatile uint8_t B[16];
EZBENCH2("psrldq const 𝑖", donothing, PROGN(psrldq(A, B, 7)));
EZBENCH2("psrldq var 𝑖", donothing, PROGN(psrldq(A, B, VEIL("r", 7))));
EZBENCH2("psrldq ansi", donothing, PROGN((psrldq)(A, B, 7)));
}
BENCH(pslldq, bench) {
volatile uint8_t A[16];
volatile uint8_t B[16];
EZBENCH2("pslldq const 𝑖", donothing, PROGN(pslldq(A, B, 7)));
EZBENCH2("pslldq var 𝑖", donothing, PROGN(pslldq(A, B, VEIL("r", 7))));
EZBENCH2("pslldq ansi", donothing, PROGN((pslldq)(A, B, 7)));
}