cosmopolitan/libc/intrin/palignr.h

56 lines
2.1 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#ifndef COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_
#define COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_
#include "libc/assert.h"
#include "libc/intrin/macros.h"
#include "libc/macros.h"
#include "libc/str/str.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
void pvalignr(void *, const void *, const void *, size_t);
/**
* Overlaps vectors.
*
* 𝑖= 0 means 𝑐←𝑎
* 0<𝑖<16 means 𝑐←𝑎║𝑏
* 𝑖=16 means 𝑐←𝑏
* 16<𝑖<32 means 𝑐𝑏║0
* 𝑖≥32 means 𝑐←0
*
* @param 𝑖 needs to be a literal, constexpr, or embedding
* @see pvalignr()
* @mayalias
*/
static void palignr(void *c, const void *b, const void *a, size_t i) {
char t[48];
memcpy(t, a, 16);
memcpy(t + 16, b, 16);
memset(t + 32, 0, 16);
memcpy(c, t + MIN(32, i), 16);
}
#ifndef __STRICT_ANSI__
#define palignr(C, B, A, I) \
do { \
if (!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSSE3)) { \
__intrin_xmm_t *Xmm0 = (void *)(C); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A); \
if (!X86_NEED(AVX)) { \
asm("palignr\t%2,%1,%0" \
: "=x"(*Xmm0) \
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
} else { \
asm("vpalignr\t%3,%2,%1,%0" \
: "=x"(*Xmm0) \
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
} \
} else { \
palignr(C, B, A, I); \
} \
} while (0)
#endif
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_ */