2020-06-15 14:18:57 +00:00
|
|
|
#ifndef COSMOPOLITAN_LIBC_BITS_AVXINTRIN_H_
|
|
|
|
#define COSMOPOLITAN_LIBC_BITS_AVXINTRIN_H_
|
|
|
|
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
|
|
|
COSMOPOLITAN_C_START_
|
|
|
|
|
|
|
|
typedef float __m256 _Vector_size(32) mayalias;
|
|
|
|
typedef double __m256d _Vector_size(32) mayalias;
|
|
|
|
typedef long long __m256i _Vector_size(32) mayalias;
|
|
|
|
|
2020-12-05 20:20:41 +00:00
|
|
|
typedef float __m256_u _Vector_size(32) forcealign(1) mayalias;
|
|
|
|
typedef double __m256d_u _Vector_size(32) forcealign(1) mayalias;
|
|
|
|
typedef long long __m256i_u _Vector_size(32) forcealign(1) mayalias;
|
2020-06-15 14:18:57 +00:00
|
|
|
|
|
|
|
typedef double __v4df _Vector_size(32);
|
|
|
|
typedef float __v8sf _Vector_size(32);
|
|
|
|
typedef long long __v4di _Vector_size(32);
|
|
|
|
typedef unsigned long long __v4du _Vector_size(32);
|
|
|
|
typedef int __v8si _Vector_size(32);
|
|
|
|
typedef unsigned __v8su _Vector_size(32);
|
|
|
|
typedef short __v16hi _Vector_size(32);
|
|
|
|
typedef unsigned short __v16hu _Vector_size(32);
|
|
|
|
typedef char __v32qi _Vector_size(32);
|
|
|
|
typedef unsigned char __v32qu _Vector_size(32);
|
|
|
|
|
|
|
|
#define _mm256_setzero_ps() ((__m256)(__v8sf){0})
|
|
|
|
#define _mm256_load_ps(FLOATPTR) (*(__m256 *)(FLOATPTR))
|
|
|
|
#define _mm256_loadu_ps(FLOATPTR) (*(__m256_u *)(FLOATPTR))
|
|
|
|
#define _mm256_store_ps(FLOATPTR, M256_0) \
|
|
|
|
(*(__m256 *)(FLOATPTR) = (__m256)(M256_0))
|
|
|
|
#define _mm256_storeu_ps(FLOATPTR, M256_0) \
|
|
|
|
(*(__m256_u *)(FLOATPTR) = (__m256)(M256_0))
|
|
|
|
#define _mm256_extractf128_ps(M256_0, INT_1) \
|
|
|
|
((__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(M256_0), \
|
|
|
|
(int)(INT_1)))
|
|
|
|
#define _mm256_insertf128_ps(M256_0, M128_1, IMM_2) \
|
|
|
|
((__m256)__builtin_ia32_vinsertf128_ps256( \
|
|
|
|
(__v8sf)(__m256)(M256_0), (__v4sf)(__m128)(M128_1), (int)(IMM_2)))
|
|
|
|
|
|
|
|
#ifdef __llvm__
|
|
|
|
#define _mm256_castps128_ps256(M128_0) \
|
|
|
|
((__m256)__builtin_shufflevector((__v4sf)(__m128)(M128_0), \
|
|
|
|
(__v4sf)(__m128)(M128_0), 0, 1, 2, 3, -1, \
|
|
|
|
-1, -1, -1))
|
|
|
|
#else
|
|
|
|
#define _mm256_castps128_ps256(M128_0) \
|
|
|
|
((__m256)__builtin_ia32_ps256_ps((__v4sf)(__m128)(M128_0)))
|
|
|
|
#endif
|
|
|
|
|
|
|
|
COSMOPOLITAN_C_END_
|
|
|
|
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
|
|
|
#endif /* COSMOPOLITAN_LIBC_BITS_AVXINTRIN_H_ */
|