cosmopolitan/libc/bits/bits.h

532 lines
26 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#ifndef COSMOPOLITAN_LIBC_BITS_H_
#define COSMOPOLITAN_LIBC_BITS_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define CheckUnsigned(x) ((x) / !((typeof(x))(-1) < 0))
/*───────────────────────────────────────────────────────────────────────────│─╗
│ cosmopolitan § bits ─╬─│┼
╚────────────────────────────────────────────────────────────────────────────│*/
extern const bool kTrue;
extern const bool kFalse;
extern const uint8_t kReverseBits[256];
uint16_t bswap_16(uint16_t) pureconst;
uint32_t bswap_32(uint32_t) pureconst;
uint32_t bswap_64(uint32_t) pureconst;
unsigned long popcount(unsigned long) pureconst;
uint32_t gray(uint32_t) pureconst;
uint32_t ungray(uint32_t) pureconst;
unsigned bcdadd(unsigned, unsigned) pureconst;
unsigned long bcd2i(unsigned long) pureconst;
unsigned long i2bcd(unsigned long) pureconst;
void bcxcpy(unsigned char (*)[16], unsigned long);
int ffs(int) pureconst;
int ffsl(long int) pureconst;
int ffsll(long long int) pureconst;
int fls(int) pureconst;
int flsl(long int) pureconst;
int flsll(long long int) pureconst;
uint8_t bitreverse8(uint8_t) libcesque pureconst;
uint16_t bitreverse16(uint16_t) libcesque pureconst;
uint32_t bitreverse32(uint32_t) libcesque pureconst;
uint64_t bitreverse64(uint64_t) libcesque pureconst;
unsigned long roundup2pow(unsigned long) libcesque pureconst;
unsigned long roundup2log(unsigned long) libcesque pureconst;
unsigned long rounddown2pow(unsigned long) libcesque pureconst;
/*───────────────────────────────────────────────────────────────────────────│─╗
│ cosmopolitan § bits » no assembly required ─╬─│┼
╚────────────────────────────────────────────────────────────────────────────│*/
/**
* Undocumented incantations for ROR, ROL, and SAR.
*/
#define ROR(w, k) (CheckUnsigned(w) >> (k) | (w) << (sizeof(w) * 8 - (k)))
#define ROL(w, k) ((w) << (k) | CheckUnsigned(w) >> (sizeof(w) * 8 - (k)))
#define SAR(w, k) (((w) & ~(~0u >> (k))) | ((w) >> ((k) & (sizeof(w) * 8 - 1))))
#define bitreverse8(X) (kReverseBits[(X)&0xff])
#define bitreverse16(X) \
((uint16_t)kReverseBits[(X)&0xff] << 010 | \
kReverseBits[((uint16_t)(X) >> 010) & 0xff])
#ifndef __GNUC__
#define READ16LE(P) ((unsigned)(P)[1] << 010 | (unsigned)(P)[0])
#define READ32LE(P) \
((unsigned long)(P)[3] << 030 | (unsigned long)(P)[2] << 020 | \
(unsigned long)(P)[1] << 010 | (unsigned long)(P)[0])
#define READ64LE(P) \
((unsigned long long)(P)[3] << 030 | (unsigned long)(P)[2] << 020 | \
(unsigned long long)(P)[1] << 010 | (unsigned long)(P)[0])
#else
#define READ16LE(P) read16le(P)
#define READ32LE(P) read32le(P)
#define READ64LE(P) read64le(P)
#define read16le(P) \
({ \
const unsigned char *Pu = (const unsigned char *)(P); \
(uint16_t) Pu[1] << 010 | (uint16_t)Pu[0]; \
})
#define read32le(P) \
({ \
const unsigned char *Pu = (const unsigned char *)(P); \
((uint32_t)Pu[3] << 030 | (uint32_t)Pu[2] << 020 | \
(uint32_t)Pu[1] << 010 | (uint32_t)Pu[0] << 000); \
})
#define read64le(P) \
({ \
const unsigned char *Pu = (const unsigned char *)(P); \
((uint64_t)Pu[7] << 070 | (uint64_t)Pu[6] << 060 | \
(uint64_t)Pu[5] << 050 | (uint64_t)Pu[4] << 040 | \
(uint64_t)Pu[3] << 030 | (uint64_t)Pu[2] << 020 | \
(uint64_t)Pu[1] << 010 | (uint64_t)Pu[0] << 000); \
})
#endif
#define WRITE16LE(P, V) \
do { \
uint8_t *Ple = (P); \
uint16_t Vle = (V); \
Ple[0] = (uint8_t)(Vle >> 000); \
Ple[1] = (uint8_t)(Vle >> 010); \
} while (0)
#define WRITE32LE(P, V) \
do { \
uint8_t *Ple = (P); \
uint32_t Vle = (V); \
Ple[0] = (uint8_t)(Vle >> 000); \
Ple[1] = (uint8_t)(Vle >> 010); \
Ple[2] = (uint8_t)(Vle >> 020); \
Ple[3] = (uint8_t)(Vle >> 030); \
} while (0)
#define WRITE64LE(P, V) \
do { \
uint8_t *Ple = (P); \
uint64_t Vle = (V); \
Ple[0] = (uint8_t)(Vle >> 000); \
Ple[1] = (uint8_t)(Vle >> 010); \
Ple[2] = (uint8_t)(Vle >> 020); \
Ple[3] = (uint8_t)(Vle >> 030); \
Ple[4] = (uint8_t)(Vle >> 040); \
Ple[5] = (uint8_t)(Vle >> 050); \
Ple[6] = (uint8_t)(Vle >> 060); \
Ple[7] = (uint8_t)(Vle >> 070); \
} while (0)
/* TODO(jart): these ones aren't coded correctly */
#define read128le(P) ((uint128_t)read64le((P) + 8) << 0100 | read64le(P))
#define read16be(P) ((uint16_t)(*(P) << 010) | (uint16_t)(*((P) + 1)))
#define read32be(P) ((uint32_t)read16be(P) << 020 | (uint32_t)read16be((P) + 2))
#define read64be(P) ((uint64_t)read32be(P) << 040 | read32be((P) + 4))
#define read128be(P) ((uint128_t)read64be(P) << 0100 | read64be((P) + 8))
/*───────────────────────────────────────────────────────────────────────────│─╗
│ cosmopolitan § bits » some assembly required ─╬─│┼
╚────────────────────────────────────────────────────────────────────────────│*/
/**
* Constraints for virtual machine flags.
* @note we beseech clang devs for flag constraints
*/
#ifdef __GCC_ASM_FLAG_OUTPUTS__ /* GCC6+ CLANG10+ */
#define CF "=@ccc"
#define CFLAG(OP) OP
#define ZF "=@ccz"
#define ZFLAG(OP) OP
#define OF "=@cco"
#define OFLAG(OP) OP
#define SF "=@ccs"
#define SFLAG(SP) SP
#define ABOVEF "=@cca" /* i.e. !ZF && !CF */
#define ABOVEFLAG(OP) OP
#else
#define CF "=q"
#define CFLAG(OP) OP "\n\tsetc\t%b0"
#define ZF "=q"
#define ZFLAG(OP) OP "\n\tsetz\t%b0"
#define OF "=q"
#define OFLAG(OP) OP "\n\tseto\t%b0"
#define SF "=q"
#define SFLAG(SP) OP "\n\tsets\t%b0"
#define ABOVEF "=@cca"
#define ABOVEFLAG(OP) OP "\n\tseta\t%b0"
#endif
/**
* Reads scalar from memory, offset by segment.
*
* @return *(MEM) relative to segment
* @see arch_prctl()
* @see pushpop()
*/
#define fs(MEM) __peek("fs", MEM)
#define gs(MEM) __peek("gs", MEM)
/**
* Reads scalar from memory w/ one operation.
*
* @param MEM is alignas(𝑘) uint𝑘_t[hasatleast 1] where 𝑘 ∈ {8,16,32,64}
* @return *(MEM)
* @note defeats compiler load tearing optimizations
* @note alignas(𝑘) is implied if compiler knows type
* @note alignas(𝑘) only avoids multi-core / cross-page edge cases
* @see Intel's Six-Thousand Page Manual V.3A §8.2.3.1
* @see atomic_store()
*/
#define atomic_load(MEM) \
({ \
autotype(MEM) Mem = (MEM); \
typeof(*Mem) Reg; \
asm("mov\t%1,%0" : "=r"(Reg) : "m"(*Mem)); \
Reg; \
})
/**
* Saves scalar to memory w/ one operation.
*
* This is guaranteed to happen in either one or zero operations,
* depending on whether or not it's possible for *(MEM) to be read
* afterwards. This macro only forbids compiler from using >1 ops.
*
* @param MEM is alignas(𝑘) uint𝑘_t[hasatleast 1] where 𝑘 ∈ {8,16,32,64}
* @param VAL is uint𝑘_t w/ better encoding for immediates (constexpr)
* @return VAL
* @note alignas(𝑘) on nexgen32e only needed for end of page gotcha
* @note alignas(𝑘) is implied if compiler knows type
* @note needed to defeat store tearing optimizations
* @see Intel Six-Thousand Page Manual Manual V.3A §8.2.3.1
* @see atomic_load()
*/
#define atomic_store(MEM, VAL) \
({ \
autotype(VAL) Val = (VAL); \
typeof(&Val) Mem = (MEM); \
asm("mov%z1\t%1,%0" : "=m,m"(*Mem) : "i,r"(Val)); \
Val; \
})
/**
* Returns true if bit is set in memory.
*
* This is a generically-typed Bitset<T> ∀ RAM. This macro is intended
* to be container-like with optimal machine instruction encoding, cf.
* machine-agnostic container abstractions. Memory accesses are words.
* Register allocation can be avoided if BIT is known. Be careful when
* casting character arrays since that should cause a page fault.
*
* @param MEM is uint𝑘_t[] where 𝑘 ∈ {16,32,64} base address
* @param BIT ∈ [-(2**(𝑘-1)),2**(𝑘-1)) is zero-based index
* @return true if bit is set, otherwise false
* @see Intel's Six Thousand Page Manual V.2A 3-113
* @see bts(), btr(), btc()
*/
#define bt(MEM, BIT) \
({ \
bool OldBit; \
if (isconstant(BIT)) { \
asm(CFLAG("bt%z1\t%2,%1") \
: CF(OldBit) \
: "m"((MEM)[(BIT) / (sizeof((MEM)[0]) * CHAR_BIT)]), \
"J"((BIT) % (sizeof((MEM)[0]) * CHAR_BIT)) \
: "cc"); \
} else if (sizeof((MEM)[0]) == 2) { \
asm(CFLAG("bt\t%w2,%1") : CF(OldBit) : "m"((MEM)[0]), "r"(BIT) : "cc"); \
} else if (sizeof((MEM)[0]) == 4) { \
asm(CFLAG("bt\t%k2,%1") : CF(OldBit) : "m"((MEM)[0]), "r"(BIT) : "cc"); \
} else if (sizeof((MEM)[0]) == 8) { \
asm(CFLAG("bt\t%q2,%1") : CF(OldBit) : "m"((MEM)[0]), "r"(BIT) : "cc"); \
} \
OldBit; \
})
#define bts(MEM, BIT) __BitOp("bts", BIT, MEM) /** bit test and set */
#define btr(MEM, BIT) __BitOp("btr", BIT, MEM) /** bit test and reset */
#define btc(MEM, BIT) __BitOp("btc", BIT, MEM) /** bit test and complement */
#define lockbts(MEM, BIT) __BitOp("lock bts", BIT, MEM)
#define lockbtr(MEM, BIT) __BitOp("lock btr", BIT, MEM)
#define lockbtc(MEM, BIT) __BitOp("lock btc", BIT, MEM)
#define lockinc(MEM) __ArithmeticOp1("lock inc", MEM)
#define lockdec(MEM) __ArithmeticOp1("lock dec", MEM)
#define locknot(MEM) __ArithmeticOp1("lock not", MEM)
#define lockneg(MEM) __ArithmeticOp1("lock neg", MEM)
#define lockaddeq(MEM, VAL) __ArithmeticOp2("lock add", VAL, MEM)
#define locksubeq(MEM, VAL) __ArithmeticOp2("lock sub", VAL, MEM)
#define lockxoreq(MEM, VAL) __ArithmeticOp2("lock xor", VAL, MEM)
#define lockandeq(MEM, VAL) __ArithmeticOp2("lock and", VAL, MEM)
#define lockoreq(MEM, VAL) __ArithmeticOp2("lock or", VAL, MEM)
/**
* Exchanges *MEMORY into *LOCALVAR w/ one operation.
*
* @param MEMORY is uint𝑘_t[hasatleast 1] where 𝑘 ∈ {8,16,32,64}
* @param LOCALVAR is uint𝑘_t[hasatleast 1]
* @return LOCALVAR[0]
* @see xchg()
*/
#define lockxchg(MEMORY, LOCALVAR) \
({ \
static_assert(typescompatible(typeof(*(MEMORY)), typeof(*(LOCALVAR)))); \
asm("xchg\t%0,%1" : "+%m"(*(MEMORY)), "+r"(*(LOCALVAR))); \
*(LOCALVAR); \
})
/**
* Compares and exchanges.
*
* @param IFTHING is uint𝑘_t[hasatleast 1] where 𝑘 ∈ {8,16,32,64}
* @return true if value was exchanged, otherwise false
* @see lockcmpxchg()
*/
#define cmpxchg(IFTHING, ISEQUALTOME, REPLACEITWITHME) \
({ \
bool DidIt; \
asm(ZFLAG("cmpxchg\t%3,%1") \
: ZF(DidIt), "+m"(*(IFTHING)), "+a"(*(ISEQUALTOME)) \
: "r"((typeof(*(IFTHING)))(REPLACEITWITHME)) \
: "cc"); \
DidIt; \
})
#define ezcmpxchg(IFTHING, ISEQUALTOME, REPLACEITWITHME) \
({ \
bool DidIt; \
autotype(IFTHING) IfThing = (IFTHING); \
typeof(*IfThing) IsEqualToMe = (ISEQUALTOME); \
typeof(*IfThing) ReplaceItWithMe = (REPLACEITWITHME); \
asm(ZFLAG("cmpxchg\t%3,%1") \
: ZF(DidIt), "+m"(*IfThing), "+a"(IsEqualToMe) \
: "r"(ReplaceItWithMe) \
: "cc"); \
DidIt; \
})
/**
* Compares and exchanges w/ one operation.
*
* @param IFTHING is uint𝑘_t[hasatleast 1] where 𝑘 ∈ {8,16,32,64}
* @return true if value was exchanged, otherwise false
* @see lockcmpxchg()
*/
#define lockcmpxchg(IFTHING, ISEQUALTOME, REPLACEITWITHME) \
({ \
bool DidIt; \
asm(ZFLAG("lock cmpxchg\t%3,%1") \
: ZF(DidIt), "+m"(*(IFTHING)), "+a"(*(ISEQUALTOME)) \
: "r"((typeof(*(IFTHING)))(REPLACEITWITHME)) \
: "cc"); \
DidIt; \
})
/**
* Gets value of extended control register.
*/
#define xgetbv(xcr_register_num) \
({ \
unsigned hi, lo; \
asm("xgetbv" : "=d"(hi), "=a"(lo) : "c"(cr_register_num)); \
(uint64_t) hi << 32 | lo; \
})
/**
* Reads model-specific register.
* @note programs running as guests won't have authorization
*/
#define rdmsr(msr) \
({ \
uint32_t lo, hi; \
asm volatile("rdmsr" : "=a"(lo), "=d"(hi) : "c"(msr)); \
(uint64_t) hi << 32 | lo; \
})
/**
* Writes model-specific register.
* @note programs running as guests won't have authorization
*/
#define wrmsr(msr, val) \
do { \
uint64_t val_ = (val); \
asm volatile("wrmsr" \
: /* no outputs */ \
: "c"(msr), "a"((uint32_t)val_), \
"d"((uint32_t)(val_ >> 32))); \
} while (0)
/**
* Tells CPU page tables changed for virtual address.
* @note programs running as guests won't have authorization
*/
#define invlpg(MEM) \
asm volatile("invlpg\t(%0)" : /* no outputs */ : "r"(MEM) : "memory")
/**
* Teleports code fragment inside _init().
*/
#define INITIALIZER(PRI, NAME, CODE) \
asm(".pushsection .init." #PRI "." #NAME ",\"ax\",@progbits\n\t" \
"call\t" #NAME "\n\t" \
".popsection"); \
textstartup optimizesize void NAME(char *rdi, const char *rsi) { \
CODE; \
asm volatile("" : /* no outputs */ : "D"(rdi), "S"(rsi)); \
}
#ifndef __STRICT_ANSI__
#if __PIC__ + __code_model_medium__ + __code_model_large__ + 0 > 1
#define __EZLEA(SYMBOL) "lea\t" SYMBOL "(%%rip),%"
#else
#define __EZLEA(SYMBOL) "mov\t$" SYMBOL ",%k"
#endif
#define weaken(symbol) ((const typeof(&(symbol)))weakaddr(#symbol))
#define strongaddr(symbolstr) \
({ \
intptr_t waddr; \
asm(__EZLEA(symbolstr) "0" : "=r"(waddr)); \
waddr; \
})
#define weakaddr(symbolstr) \
({ \
intptr_t waddr; \
asm(".weak\t" symbolstr "\n\t" __EZLEA(symbolstr) "0" : "=r"(waddr)); \
waddr; \
})
#else
#define weaken(symbol) symbol
#define weakaddr(symbolstr) &(symbolstr)
#endif
#define slowcall(fn, arg1, arg2, arg3, arg4, arg5, arg6) \
({ \
void *ax; \
asm volatile("push\t%7\n\t" \
"push\t%6\n\t" \
"push\t%5\n\t" \
"push\t%4\n\t" \
"push\t%3\n\t" \
"push\t%2\n\t" \
"push\t%1\n\t" \
"call\tslowcall" \
: "=a"(ax) \
: "g"(fn), "g"(arg1), "g"(arg2), "g"(arg3), "g"(arg4), \
"g"(arg5), "g"(arg6) \
: "memory"); \
ax; \
})
#define IsAddressCanonicalForm(P) \
({ \
intptr_t p2 = (intptr_t)(P); \
(0xffff800000000000l <= p2 && p2 <= 0x00007fffffffffffl); \
})
/*───────────────────────────────────────────────────────────────────────────│─╗
│ cosmopolitan § bits » optimizations ─╬─│┼
╚────────────────────────────────────────────────────────────────────────────│*/
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
#define popcount(X) (isconstant(X) ? __builtin_popcount(X) : __popcount(X))
#define popcount$nehalem(X) \
({ \
typeof(X) BitCount; \
asm("popcnt\t%1,%0" : "=r,r"(BitCount) : "r,m"(X) : "cc"); \
BitCount; \
})
#ifdef __POPCNT__
#define __popcount(X) popcount$nehalem(X)
#else
#define __popcount(X) (popcount)(X)
#endif
#define bswap_16(U16) \
(isconstant(U16) ? ((((U16)&0xff00) >> 010) | (((U16)&0x00ff) << 010)) : ({ \
uint16_t Swapped16, Werd16 = (U16); \
asm("xchg\t%b0,%h0" : "=Q"(Swapped16) : "0"(Werd16)); \
Swapped16; \
}))
#define bswap_32(U32) \
(isconstant(U32) \
? ((((U32)&0xff000000) >> 030) | (((U32)&0x000000ff) << 030) | \
(((U32)&0x00ff0000) >> 010) | (((U32)&0x0000ff00) << 010)) \
: ({ \
uint32_t Swapped32, Werd32 = (U32); \
asm("bswap\t%0" : "=r"(Swapped32) : "0"(Werd32)); \
Swapped32; \
}))
#define bswap_64(U64) \
(isconstant(U64) ? ((((U64)&0xff00000000000000ul) >> 070) | \
(((U64)&0x00000000000000fful) << 070) | \
(((U64)&0x00ff000000000000ul) >> 050) | \
(((U64)&0x000000000000ff00ul) << 050) | \
(((U64)&0x0000ff0000000000ul) >> 030) | \
(((U64)&0x0000000000ff0000ul) << 030) | \
(((U64)&0x000000ff00000000ul) >> 010) | \
(((U64)&0x00000000ff000000ul) << 010)) \
: ({ \
uint64_t Swapped64, Werd64 = (U64); \
asm("bswap\t%0" : "=r"(Swapped64) : "0"(Werd64)); \
Swapped64; \
}))
#endif /* defined(__GNUC__) && !defined(__STRICT_ANSI__) */
/*───────────────────────────────────────────────────────────────────────────│─╗
│ cosmopolitan § bits » implementation details ─╬─│┼
╚────────────────────────────────────────────────────────────────────────────│*/
#define __peek(SEGMENT, ADDRESS) \
({ \
typeof(*(ADDRESS)) Pk; \
asm("mov\t%%" SEGMENT ":%1,%0" : "=r"(Pk) : "m"(*(ADDRESS))); \
Pk; \
})
#define __ArithmeticOp1(OP, MEM) \
({ \
asm(OP "%z0\t%0" : "+m"(*(MEM)) : /* no inputs */ : "cc"); \
MEM; \
})
#define __ArithmeticOp2(OP, VAL, MEM) \
({ \
asm(OP "%z0\t%1,%0" : "+m,m"(*(MEM)) : "i,r"(VAL) : "cc"); \
MEM; \
})
#define __BitOp(OP, BIT, MEM) \
({ \
bool OldBit; \
if (isconstant(BIT)) { \
asm(CFLAG(OP "%z1\t%2,%1") \
: CF(OldBit), "+m"((MEM)[(BIT) / (sizeof((MEM)[0]) * CHAR_BIT)]) \
: "J"((BIT) % (sizeof((MEM)[0]) * CHAR_BIT)) \
: "cc"); \
} else if (sizeof((MEM)[0]) == 2) { \
asm(CFLAG(OP "\t%w2,%1") \
: CF(OldBit), "+m"((MEM)[0]) \
: "r"(BIT) \
: "cc"); \
} else if (sizeof((MEM)[0]) == 4) { \
asm(CFLAG(OP "\t%k2,%1") \
: CF(OldBit), "+m"((MEM)[0]) \
: "r"(BIT) \
: "cc"); \
} else if (sizeof((MEM)[0]) == 8) { \
asm(CFLAG(OP "\t%q2,%1") \
: CF(OldBit), "+m"((MEM)[0]) \
: "r"(BIT) \
: "cc"); \
} \
OldBit; \
})
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_H_ */