cosmopolitan/libc/intrin/palignr.h

56 lines
2.1 KiB
C
Raw Normal View History

2020-06-15 14:18:57 +00:00
#ifndef COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_
#define COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_
#include "libc/assert.h"
#include "libc/intrin/macros.h"
#include "libc/macros.h"
#include "libc/str/str.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
void pvalignr(void *, const void *, const void *, size_t);
/**
* Overlaps vectors.
*
* ๐‘–= 0 means ๐‘โ†๐‘Ž
* 0<๐‘–<16 means ๐‘โ†๐‘Žโ•‘๐‘
* ๐‘–=16 means ๐‘โ†๐‘
* 16<๐‘–<32 means ๐‘โ†๐‘โ•‘0
* ๐‘–โ‰ฅ32 means ๐‘โ†0
*
* @param ๐‘– needs to be a literal, constexpr, or embedding
* @see pvalignr()
* @mayalias
*/
static void palignr(void *c, const void *b, const void *a, size_t i) {
char t[48];
memcpy(t, a, 16);
memcpy(t + 16, b, 16);
memset(t + 32, 0, 16);
memcpy(c, t + MIN(32, i), 16);
}
#ifndef __STRICT_ANSI__
#define palignr(C, B, A, I) \
do { \
if (!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSSE3)) { \
__intrin_xmm_t *Xmm0 = (void *)(C); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A); \
if (!X86_NEED(AVX)) { \
asm("palignr\t%2,%1,%0" \
: "=x"(*Xmm0) \
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
} else { \
asm("vpalignr\t%3,%2,%1,%0" \
: "=x"(*Xmm0) \
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
} \
} else { \
palignr(C, B, A, I); \
} \
} while (0)
#endif
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_ */