This is a maintenance fork
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

3921 lines
132 KiB

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2020 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/calls/calls.h"
#include "libc/calls/struct/stat.h"
#include "libc/elf/def.h"
#include "libc/fmt/conv.h"
#include "libc/log/check.h"
#include "libc/log/log.h"
#include "libc/macros.h"
#include "libc/mem/mem.h"
#include "libc/nexgen32e/bsr.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/o.h"
#include "libc/x/x.h"
#include "third_party/chibicc/file.h"
#include "third_party/gdtoa/gdtoa.h"
#include "tool/build/lib/elfwriter.h"
/**
* @fileoverview Assembler
*
* This program turns assembly into relocatable NexGen32e ELF objects.
* That process is normally an implementation detail of your compiler,
* which can embed this program or launch it as a subprocess. Much GNU
* style syntax is supported. Your code that gets embedded in an asm()
* statement will ultimately end up here. This implementation, has the
* advantage of behaving the same across platforms, in a simple single
* file implementation that compiles down to a 100kilo ape executable.
*
* Your assembler supports the following flags:
*
* -o FILE output path [default: a.out]
* -I DIR append include path [default: .]
* -W inhibit .warning
* -Z inhibit .error and .err
*
* Your assembler supports the following directives:
*
* .zero INT... emits int8
* .word INT... emits int16
* .long INT... emits int32
* .quad INT... emits int64
* .ascii STR... emits string
* .asciz STR... emits string and 0 byte
* .ident STR emits string to .comment section
* .float NUMBER... emits binary32
* .double NUMBER... emits binary64
* .float80 NUMBER... emits x86 float (10 bytes)
* .ldbl NUMBER... emits x86 float (16 bytes)
* .sleb128 NUMBER... emits LEB-128 signed varint
* .uleb128 NUMBER... emits LEB-128 unsigned varint
* .align BYTES [FILL [MAXSKIP]] emits fill bytes to boundary
* .end halts tokenization
* .abort crashes assembler
* .err aborts (ignorable w/ -Z)
* .error STR aborts (ignorable w/ -Z)
* .warning STR whines (ignorable w/ -W)
* .text enters text section (default)
* .data enters data section
* .bss enters bss section
* .section NAME [SFLG SHT] enters section
* .previous enters previous section
* .pushsection NAME [SFLG SHT] pushes section
* .popsection pops section
* .type SYM TYPE sets type of symbol
* .size SYM EXPR sets size of symbol
* .internal SYM... marks symbol STV_INTERNAL
* .hidden SYM... marks symbol STV_HIDDEN
* .protected SYM... marks symbol STV_PROTECTED
* .globl SYM... marks symbol STB_GLOBAL
* .local SYM... marks symbol STB_LOCAL
* .weak SYM... marks symbol STB_WEAK
* .include FILE assembles file source
* .incbin FILE emits file content
* .file FILENO PATH dwarf file define
* .loc FILENO LINENO dwarf source line
*
* TYPE can be one of the following:
*
* - @notype STT_NOTYPE (default)
* - @object STT_OBJECT
* - @function STT_FUNC
* - @common STT_COMMON
* - @tls_object STT_TLS
*
* SHT can be one of the following:
*
* - @progbits SHT_PROGBITS
* - @note SHT_NOTE
* - @nobits SHT_NOBITS
* - @preinit_array SHT_PREINIT_ARRAY
* - @init_array SHT_INIT_ARRAY
* - @fini_array SHT_FINI_ARRAY
*
* SFLG is a string which may have the following characters:
*
* - a SHF_ALLOC
* - w SHF_WRITE
* - x SHF_EXECINSTR
* - g SHF_GROUP
* - M SHF_MERGE
* - S SHF_STRINGS
* - T SHF_TLS
*/
#define OSZ 0x66
#define ASZ 0x67
#define REX 0x40 // byte
#define REXB 0x41 // src
#define REXX 0x42 // index
#define REXR 0x44 // dest
#define REXW 0x48 // quad
#define HASASZ 0x00010000
#define HASBASE 0x00020000
#define HASINDEX 0x00040000
#define ISRIP 0x00080000
#define ISREG 0x00100000
#define APPEND(L) L.p = realloc(L.p, ++L.n * sizeof(*L.p))
#define IS(P, N, S) (N == sizeof(S) - 1 && !strncasecmp(P, S, sizeof(S) - 1))
#define MAX(X, Y) ((Y) < (X) ? (X) : (Y))
#define LOAD128BE(S) ((unsigned __int128)LOAD64BE(S) << 64 | LOAD64BE((S) + 8))
#define LOAD64BE(S) \
((unsigned long)((unsigned char *)(S))[0] << 070 | \
(unsigned long)((unsigned char *)(S))[1] << 060 | \
(unsigned long)((unsigned char *)(S))[2] << 050 | \
(unsigned long)((unsigned char *)(S))[3] << 040 | \
(unsigned long)((unsigned char *)(S))[4] << 030 | \
(unsigned long)((unsigned char *)(S))[5] << 020 | \
(unsigned long)((unsigned char *)(S))[6] << 010 | \
(unsigned long)((unsigned char *)(S))[7] << 000)
struct As {
int i; // things
int section; // sections
int previous; // sections
int inpath; // strings
int outpath; // strings
int counter;
int pcrelative;
bool inhibiterr;
bool inhibitwarn;
struct Ints {
unsigned long n;
long *p;
} ints;
struct Floats {
unsigned long n;
long double *p;
} floats;
struct Slices {
unsigned long n;
struct Slice {
unsigned long n;
char *p;
} * p;
} slices;
struct Sauces {
unsigned long n;
struct Sauce {
unsigned path; // strings
unsigned line; // 1-indexed
} * p;
} sauces;
struct Things {
unsigned long n;
struct Thing {
enum ThingType {
TT_INT,
TT_FLOAT,
TT_SLICE,
TT_PUNCT,
TT_FORWARD,
TT_BACKWARD,
} t : 4;
unsigned s : 28; // sauces
unsigned i; // identity,ints,floats,slices
} * p;
} things;
struct Sections {
unsigned long n;
struct Section {
unsigned name; // strings
int flags;
int type;
int align;
struct Slice binary;
} * p;
} sections;
struct Symbols {
unsigned long n;
struct Symbol {
bool isused;
unsigned char stb; // STB_*
unsigned char stv; // STV_*
unsigned char type; // STT_*
unsigned name; // slices
unsigned section; // sections
long offset;
long size;
struct ElfWriterSymRef ref;
} * p;
} symbols;
struct HashTable {
unsigned i, n;
struct HashEntry {
unsigned h;
unsigned i;
} * p;
} symbolindex;
struct Labels {
unsigned long n;
struct Label {
unsigned id;
unsigned tok; // things
unsigned symbol; // symbols
} * p;
} labels;
struct Relas {
unsigned long n;
struct Rela {
bool isdead;
int kind; // R_X86_64_{16,32,64,PC8,PC32,PLT32,GOTPCRELX,...}
unsigned expr; // exprs
unsigned section; // sections
long offset;
long addend;
} * p;
} relas;
struct Exprs {
unsigned long n;
struct Expr {
enum ExprKind {
EX_INT, // integer
EX_SYM, // things (then symbols after eval)
EX_NEG, // unary -
EX_NOT, // unary !
EX_BITNOT, // unary ~
EX_ADD, // +
EX_SUB, // -
EX_MUL, // *
EX_DIV, // /
EX_REM, // %
EX_AND, // &
EX_OR, // |
EX_XOR, // ^
EX_SHL, // <<
EX_SHR, // >>
EX_EQ, // ==
EX_NE, // !=
EX_LT, // <
EX_LE, // <=
} kind;
enum ExprMod {
EM_NORMAL,
EM_GOTPCREL,
EM_DTPOFF,
EM_TPOFF,
} em;
unsigned tok;
int lhs;
int rhs;
long x;
bool isvisited;
bool isevaluated;
} * p;
} exprs;
struct Strings {
unsigned long n;
char **p;
} strings, incpaths;
struct SectionStack {
unsigned long n;
int *p;
} sectionstack;
};
static const char kPrefixByte[30] = {
0x67, 0x2E, 0x66, 0x3E, 0x26, 0x64, 0x65, 0xF0, 0xF3, 0xF3,
0xF2, 0xF2, 0xF3, 0x40, 0x41, 0x44, 0x45, 0x46, 0x47, 0x48,
0x49, 0x4C, 0x4D, 0x4E, 0x4F, 0x4A, 0x4B, 0x42, 0x43, 0x36,
};
static const char kPrefix[30][8] = {
"addr32", "cs", "data16", "ds", "es", "fs",
"gs", "lock", "rep", "repe", "repne", "repnz",
"repz", "rex", "rex.b", "rex.r", "rex.rb", "rex.rx",
"rex.rxb", "rex.w", "rex.wb", "rex.wr", "rex.wrb", "rex.wrx",
"rex.wrxb", "rex.wx", "rex.wxb", "rex.x", "rex.xb", "ss",
};
static const char kSegmentByte[6] = {0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36};
static const char kSegment[6][2] = {"cs", "ds", "es", "fs", "gs", "ss"};
/**
* Context-sensitive register encoding information.
*
* ┌rex
* │ ┌log₂size
* │ │ ┌reg
* ├──────┐ ├─┐├─┐
* 0b0000000000000000
*/
static const struct Reg {
char s[8];
short reg;
short rm;
short base;
short index;
} kRegs[] = /* clang-format off */ {
{"ah", 4, 4, -1, -1 },
{"al", 0, 0, -1, -1 },
{"ax", 0 | 1<<3, 0 | 1<<3, -1, -1 },
{"bh", 7, 7, -1, -1 },
{"bl", 3, 3, -1, -1 },
{"bp", 5 | 1<<3, 5 | 1<<3, -1, -1 },
{"bpl", 5 | REX<<8, 5 | REX<<8, -1, -1 },
{"bx", 3 | 1<<3, 3 | 1<<3, -1, -1 },
{"ch", 5, 5, -1, -1 },
{"cl", 1, 1, -1, -1 },
{"cx", 1 | 1<<3, 1 | 1<<3, -1, -1 },
{"dh", 6, 6, -1, -1 },
{"di", 7 | 1<<3, 7 | 1<<3, -1, -1 },
{"dil", 7 | REX<<8, 7 | REX<<8, -1, -1 },
{"dl", 2, 2, -1, -1 },
{"dx", 2 | 1<<3, 2 | 1<<3, -1, -1 },
{"eax", 0 | 2<<3, 0 | 2<<3, 0 | 2<<3, 0 | 2<<3 },
{"ebp", 5 | 2<<3, 5 | 2<<3, 5 | 2<<3, 5 | 2<<3 },
{"ebx", 3 | 2<<3, 3 | 2<<3, 3 | 2<<3, 3 | 2<<3 },
{"ecx", 1 | 2<<3, 1 | 2<<3, 1 | 2<<3, 1 | 2<<3 },
{"edi", 7 | 2<<3, 7 | 2<<3, 7 | 2<<3, 7 | 2<<3 },
{"edx", 2 | 2<<3, 2 | 2<<3, 2 | 2<<3, 2 | 2<<3 },
{"eiz", -1, -1, -1, 4 | 2<<3 },
{"esi", 6 | 2<<3, 6 | 2<<3, 6 | 2<<3, 6 | 2<<3 },
{"esp", 4 | 2<<3, 4 | 2<<3, 4 | 2<<3, 4 | 2<<3 },
{"mm0", 0 | 3<<3, 0 | 3<<3, -1, -1 },
{"mm1", 1 | 3<<3, 1 | 3<<3, -1, -1 },
{"mm2", 2 | 3<<3, 2 | 3<<3, -1, -1 },
{"mm3", 3 | 3<<3, 3 | 3<<3, -1, -1 },
{"mm4", 4 | 3<<3, 4 | 3<<3, -1, -1 },
{"mm5", 5 | 3<<3, 5 | 3<<3, -1, -1 },
{"mm6", 6 | 3<<3, 6 | 3<<3, -1, -1 },
{"mm7", 7 | 3<<3, 7 | 3<<3, -1, -1 },
{"mm8", 0 | 3<<3 | REXR<<8, 0 | 3<<3 | REXB<<8, -1, -1 },
{"mm9", 1 | 3<<3 | REXR<<8, 1 | 3<<3 | REXB<<8, -1, -1 },
{"r10", 2 | 3<<3 | REXR<<8 | REXW<<8, 2 | 3<<3 | REXB<<8 | REXW<<8, 2 | 3<<3 | REXB<<8, 2 | 3<<3 | REXX<<8 },
{"r10b", 2 | REXR<<8, 2 | REXB<<8, -1, -1 },
{"r10d", 2 | 2<<3 | REXR<<8, 2 | 2<<3 | REXB<<8, 2 | 2<<3 | REXB<<8, 2 | 2<<3 | REXX<<8 },
{"r10w", 2 | 1<<3 | REXR<<8, 2 | 1<<3 | REXB<<8, -1, -1 },
{"r11", 3 | 3<<3 | REXR<<8 | REXW<<8, 3 | 3<<3 | REXB<<8 | REXW<<8, 3 | 3<<3 | REXB<<8, 3 | 3<<3 | REXX<<8 },
{"r11b", 3 | REXR<<8, 3 | REXB<<8, -1, -1 },
{"r11d", 3 | 2<<3 | REXR<<8, 3 | 2<<3 | REXB<<8, 3 | 2<<3 | REXB<<8, 3 | 2<<3 | REXX<<8 },
{"r11w", 3 | 1<<3 | REXR<<8, 3 | 1<<3 | REXB<<8, -1, -1 },
{"r12", 4 | 3<<3 | REXR<<8 | REXW<<8, 4 | 3<<3 | REXB<<8 | REXW<<8, 4 | 3<<3 | REXB<<8, 4 | 3<<3 | REXX<<8 },
{"r12b", 4 | REXR<<8, 4 | REXB<<8, -1, -1 },
{"r12d", 4 | 2<<3 | REXR<<8, 4 | 2<<3 | REXB<<8, 4 | 2<<3 | REXB<<8, 4 | 2<<3 | REXX<<8 },
{"r12w", 4 | 1<<3 | REXR<<8, 4 | 1<<3 | REXB<<8, -1, -1 },
{"r13", 5 | 3<<3 | REXR<<8 | REXW<<8, 5 | 3<<3 | REXB<<8 | REXW<<8, 5 | 3<<3 | REXB<<8, 5 | 3<<3 | REXX<<8 },
{"r13b", 5 | REXR<<8, 5 | REXB<<8, -1, -1 },
{"r13d", 5 | 2<<3 | REXR<<8, 5 | 2<<3 | REXB<<8, 5 | 2<<3 | REXB<<8, 5 | 2<<3 | REXX<<8 },
{"r13w", 5 | 1<<3 | REXR<<8, 5 | 1<<3 | REXB<<8, -1, -1 },
{"r14", 6 | 3<<3 | REXR<<8 | REXW<<8, 6 | 3<<3 | REXB<<8 | REXW<<8, 6 | 3<<3 | REXB<<8, 6 | 3<<3 | REXX<<8 },
{"r14b", 6 | REXR<<8, 6 | REXB<<8, -1, -1 },
{"r14d", 6 | 2<<3 | REXR<<8, 6 | 2<<3 | REXB<<8, 6 | 2<<3 | REXB<<8, 6 | 2<<3 | REXX<<8 },
{"r14w", 6 | 1<<3 | REXR<<8, 6 | 1<<3 | REXB<<8, -1, -1 },
{"r15", 7 | 3<<3 | REXR<<8 | REXW<<8, 7 | 3<<3 | REXB<<8 | REXW<<8, 7 | 3<<3 | REXB<<8, 7 | 3<<3 | REXX<<8 },
{"r15b", 7 | REXR<<8, 7 | REXB<<8, -1, -1 },
{"r15d", 7 | 2<<3 | REXR<<8, 7 | 2<<3 | REXB<<8, 7 | 2<<3 | REXB<<8, 7 | 2<<3 | REXX<<8 },
{"r15w", 7 | 1<<3 | REXR<<8, 7 | 1<<3 | REXB<<8, -1, -1 },
{"r8", 0 | 3<<3 | REXR<<8 | REXW<<8, 0 | 3<<3 | REXB<<8 | REXW<<8, 0 | 3<<3 | REXB<<8, 0 | 3<<3 | REXX<<8 },
{"r8b", 0 | REXR<<8, 0 | REXB<<8, -1, -1 },
{"r8d", 0 | 2<<3 | REXR<<8, 0 | 2<<3 | REXB<<8, 0 | 2<<3 | REXB<<8, 0 | 2<<3 | REXX<<8 },
{"r8w", 0 | 1<<3 | REXR<<8, 0 | 1<<3 | REXB<<8, -1, -1 },
{"r9", 1 | 3<<3 | REXR<<8 | REXW<<8, 1 | 3<<3 | REXB<<8 | REXW<<8, 1 | 3<<3 | REXB<<8, 1 | 3<<3 | REXX<<8 },
{"r9b", 1 | REXR<<8, 1 | REXB<<8, -1, -1 },
{"r9d", 1 | 2<<3 | REXR<<8, 1 | 2<<3 | REXB<<8, 1 | 2<<3 | REXB<<8, 1 | 2<<3 | REXX<<8 },
{"r9w", 1 | 1<<3 | REXR<<8, 1 | 1<<3 | REXB<<8, -1, -1 },
{"rax", 0 | 3<<3 | REXW<<8, 0 | 3<<3 | REXW<<8, 0 | 3<<3, 0 | 3<<3 },
{"rbp", 5 | 3<<3 | REXW<<8, 5 | 3<<3 | REXW<<8, 5 | 3<<3, 5 | 3<<3 },
{"rbx", 3 | 3<<3 | REXW<<8, 3 | 3<<3 | REXW<<8, 3 | 3<<3, 3 | 3<<3 },
{"rcx", 1 | 3<<3 | REXW<<8, 1 | 3<<3 | REXW<<8, 1 | 3<<3, 1 | 3<<3 },
{"rdi", 7 | 3<<3 | REXW<<8, 7 | 3<<3 | REXW<<8, 7 | 3<<3, 7 | 3<<3 },
{"rdx", 2 | 3<<3 | REXW<<8, 2 | 3<<3 | REXW<<8, 2 | 3<<3, 2 | 3<<3 },
{"riz", -1, -1, -1, 4 | 3<<3 },
{"rsi", 6 | 3<<3 | REXW<<8, 6 | 3<<3 | REXW<<8, 6 | 3<<3, 6 | 3<<3 },
{"rsp", 4 | 3<<3 | REXW<<8, 4 | 3<<3 | REXW<<8, 4 | 3<<3, 4 | 3<<3 },
{"si", 6 | 1<<3, 6 | 1<<3, 6 | 1<<3, 6 | 1<<3 },
{"sil", 6 | REX<<8, 6 | REX<<8, 6 | REX<<8, 6 | REX<<8 },
{"sp", 4 | 1<<3, 4 | 1<<3, 4 | 1<<3, 4 | 1<<3 },
{"spl", 4 | REX<<8, 4 | REX<<8, 4 | REX<<8, 4 | REX<<8 },
{"st", 0 | 4<<3, 0 | 4<<3, -1, -1 },
{"st(0)", 0 | 4<<3, 0 | 4<<3, -1, -1 },
{"st(1)", 1 | 4<<3, 1 | 4<<3, -1, -1 },
{"st(2)", 2 | 4<<3, 2 | 4<<3, -1, -1 },
{"st(3)", 3 | 4<<3, 3 | 4<<3, -1, -1 },
{"st(4)", 4 | 4<<3, 4 | 4<<3, -1, -1 },
{"st(5)", 5 | 4<<3, 5 | 4<<3, -1, -1 },
{"st(6)", 6 | 4<<3, 6 | 4<<3, -1, -1 },
{"st(7)", 7 | 4<<3, 7 | 4<<3, -1, -1 },
{"xmm0", 0 | 4<<3, 0 | 4<<3, -1, -1 },
{"xmm1", 1 | 4<<3, 1 | 4<<3, -1, -1 },
{"xmm10", 2 | 4<<3 | REXR<<8, 2 | 4<<3 | REXB<<8, -1, -1 },
{"xmm11", 3 | 4<<3 | REXR<<8, 3 | 4<<3 | REXB<<8, -1, -1 },
{"xmm12", 4 | 4<<3 | REXR<<8, 4 | 4<<3 | REXB<<8, -1, -1 },
{"xmm13", 5 | 4<<3 | REXR<<8, 5 | 4<<3 | REXB<<8, -1, -1 },
{"xmm14", 6 | 4<<3 | REXR<<8, 6 | 4<<3 | REXB<<8, -1, -1 },
{"xmm15", 7 | 4<<3 | REXR<<8, 7 | 4<<3 | REXB<<8, -1, -1 },
{"xmm2", 2 | 4<<3, 2 | 4<<3, -1, -1 },
{"xmm3", 3 | 4<<3, 3 | 4<<3, -1, -1 },
{"xmm4", 4 | 4<<3, 4 | 4<<3, -1, -1 },
{"xmm5", 5 | 4<<3, 5 | 4<<3, -1, -1 },
{"xmm6", 6 | 4<<3, 6 | 4<<3, -1, -1 },
{"xmm7", 7 | 4<<3, 7 | 4<<3, -1, -1 },
{"xmm8", 0 | 4<<3 | REXR<<8, 0 | 4<<3 | REXB<<8, -1, -1 },
{"xmm9", 1 | 4<<3 | REXR<<8, 1 | 4<<3 | REXB<<8, -1, -1 },
} /* clang-format on */;
static unsigned Hash(const void *p, unsigned long n) {
unsigned h, i;
for (h = i = 0; i < n; i++) {
h += ((unsigned char *)p)[i];
h *= 0x9e3779b1;
}
return MAX(1, h);
}
static bool IsPunctMergeable(int c) {
switch (c) {
case ';':
case '$':
return false;
default:
return true;
}
}
static char *PunctToStr(int p, char b[4]) {
int c, i, j;
memset(b, 0, 4);
for (j = 0, i = 2; i >= 0; --i) {
if ((c = (p >> (i * 8)) & 0xff)) {
b[j++] = c;
}
}
return b;
}
static void PrintSlice(struct Slice s) {
fprintf(stderr, "%.*s\n", s.n, s.p);
}
static char *SaveString(struct Strings *l, char *p) {
APPEND((*l));
l->p[l->n - 1] = p;
return p;
}
static int StrDup(struct As *a, const char *s) {
SaveString(&a->strings, strdup(s));
return a->strings.n - 1;
}
static int SliceDup(struct As *a, struct Slice s) {
SaveString(&a->strings, strndup(s.p, s.n));
return a->strings.n - 1;
}
static int AppendSauce(struct As *a, int path, int line) {
if (!a->sauces.n || (line != a->sauces.p[a->sauces.n - 1].line ||
path != a->sauces.p[a->sauces.n - 1].path)) {
APPEND(a->sauces);
a->sauces.p[a->sauces.n - 1].path = path;
a->sauces.p[a->sauces.n - 1].line = line;
}
return a->sauces.n - 1;
}
static void AppendExpr(struct As *a) {
APPEND(a->exprs);
memset(a->exprs.p + a->exprs.n - 1, 0, sizeof(*a->exprs.p));
a->exprs.p[a->exprs.n - 1].tok = a->i;
a->exprs.p[a->exprs.n - 1].lhs = -1;
a->exprs.p[a->exprs.n - 1].rhs = -1;
}
static void AppendThing(struct As *a) {
APPEND(a->things);
memset(a->things.p + a->things.n - 1, 0, sizeof(*a->things.p));
}
static void AppendRela(struct As *a) {
APPEND(a->relas);
memset(a->relas.p + a->relas.n - 1, 0, sizeof(*a->relas.p));
}
static void AppendSlice(struct As *a) {
APPEND(a->slices);
memset(a->slices.p + a->slices.n - 1, 0, sizeof(*a->slices.p));
}
static int AppendSection(struct As *a, int name, int flags, int type) {
int i;
APPEND(a->sections);
i = a->sections.n - 1;
a->sections.p[i].name = name;
a->sections.p[i].flags = flags;
a->sections.p[i].type = type;
a->sections.p[i].align = 1;
a->sections.p[i].binary.p = NULL;
a->sections.p[i].binary.n = 0;
return i;
}
static struct As *NewAssembler(void) {
struct As *a = calloc(1, sizeof(struct As));
AppendSlice(a);
AppendSection(a, StrDup(a, ""), 0, SHT_NULL);
AppendSection(a, StrDup(a, ".text"), SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS);
AppendSection(a, StrDup(a, ".data"), SHF_ALLOC | SHF_WRITE, SHT_PROGBITS);
AppendSection(a, StrDup(a, ".bss"), SHF_ALLOC | SHF_WRITE, SHT_NOBITS);
a->section = 1;
return a;
}
static void FreeAssembler(struct As *a) {
int i;
for (i = 0; i < a->sections.n; ++i) free(a->sections.p[i].binary.p);
for (i = 0; i < a->strings.n; ++i) free(a->strings.p[i]);
for (i = 0; i < a->incpaths.n; ++i) free(a->incpaths.p[i]);
free(a->ints.p);
free(a->floats.p);
free(a->slices.p);
free(a->sauces.p);
free(a->things.p);
free(a->sections.p);
free(a->symbols.p);
free(a->symbolindex.p);
free(a->labels.p);
free(a->relas.p);
free(a->exprs.p);
free(a->strings.p);
free(a->incpaths.p);
free(a->sectionstack.p);
free(a);
}
static void ReadFlags(struct As *a, int argc, char *argv[]) {
int i;
a->inpath = StrDup(a, "-");
a->outpath = StrDup(a, "a.out");
for (i = 1; i < argc; ++i) {
if (!strcmp(argv[i], "-o")) {
a->outpath = StrDup(a, argv[++i]);
} else if (startswith(argv[i], "-o")) {
a->outpath = StrDup(a, argv[i] + 2);
} else if (!strcmp(argv[i], "-I")) {
SaveString(&a->incpaths, strdup(argv[++i]));
} else if (startswith(argv[i], "-I")) {
SaveString(&a->incpaths, strdup(argv[i] + 2));
} else if (!strcmp(argv[i], "-Z")) {
a->inhibiterr = true;
} else if (!strcmp(argv[i], "-W")) {
a->inhibitwarn = true;
} else if (argv[i][0] != '-') {
a->inpath = StrDup(a, argv[i]);
}
}
}
static int ReadCharLiteral(struct Slice *buf, int c, char *p, int *i) {
if (c != '\\') return c;
switch ((c = p[(*i)++])) {
case 'a':
return '\a';
case 'b':
return '\b';
case 't':
return '\t';
case 'n':
return '\n';
case 'v':
return '\v';
case 'f':
return '\f';
case 'r':
return '\r';
case 'e':
return 033;
case 'x':
if (isxdigit(p[*i])) {
c = hextoint(p[(*i)++]);
if (isxdigit(p[*i])) {
c = c * 16 + hextoint(p[(*i)++]);
}
}
return c;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
c -= '0';
if ('0' <= p[*i] && p[*i] <= '7') {
c = c * 8 + (p[(*i)++] - '0');
if ('0' <= p[*i] && p[*i] <= '7') {
c = c * 8 + (p[(*i)++] - '0');
}
}
return c;
default:
return c;
}
}
static void PrintLocation(struct As *a) {
fprintf(stderr,
"%s:%d:: ", a->strings.p[a->sauces.p[a->things.p[a->i].s].path],
a->sauces.p[a->things.p[a->i].s].line);
}
static wontreturn void Fail(struct As *a, const char *fmt, ...) {
va_list va;
PrintLocation(a);
va_start(va, fmt);
vfprintf(stderr, fmt, va);
va_end(va);
fputc('\n', stderr);
__die();
}
static wontreturn void InvalidRegister(struct As *a) {
Fail(a, "invalid register");
}
static char *FindInclude(struct As *a, const char *file) {
int i;
char *path;
struct stat st;
for (i = 0; i < a->incpaths.n; ++i) {
path = xjoinpaths(a->incpaths.p[i], file);
if (stat(path, &st) != -1 && S_ISREG(st.st_mode)) return path;
free(path);
}
return NULL;
}
static void Tokenize(struct As *a, int path) {
int c, i, line;
char *p, *path2;
struct Slice buf;
bool bol, isfloat, isfpu;
p = SaveString(&a->strings, read_file(a->strings.p[path]));
p = skip_bom(p);
canonicalize_newline(p);
remove_backslash_newline(p);
line = 1;
bol = true;
while ((c = *p)) {
if (c == '/' && p[1] == '*') {
for (i = 2; p[i]; ++i) {
if (p[i] == '\n') {
++line;
bol = true;
} else {
bol = false;
if (p[i] == '*' && p[i + 1] == '/') {
i += 2;
break;
}
}
}
p += i;
continue;
}
if (c == '#' || (c == '/' && bol) || (c == '/' && p[1] == '/')) {
p = strchr(p, '\n');
continue;
}
if (c == '\n') {
AppendThing(a);
a->things.p[a->things.n - 1].t = TT_PUNCT;
a->things.p[a->things.n - 1].s = AppendSauce(a, path, line);
a->things.p[a->things.n - 1].i = ';';
++p;
bol = true;
++line;
continue;
}
bol = false;
if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f' ||
c == '\v' || c == ',') {
++p;
continue;
}
if ((c & 0x80) || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') ||
c == '_' || c == '%' || c == '@' ||
(c == '.' && !('0' <= p[1] && p[1] <= '9'))) {
isfpu = false;
for (i = 1;; ++i) {
if (!((p[i] & 0x80) || ('a' <= p[i] && p[i] <= 'z') ||
('A' <= p[i] && p[i] <= 'Z') || ('0' <= p[i] && p[i] <= '9') ||
p[i] == '.' || p[i] == '_' || p[i] == '$' ||
(isfpu && (p[i] == '(' || p[i] == ')')))) {
break;
}
if (i == 2 && p[i - 2] == '%' && p[i - 1] == 's' && p[i] == 't') {
isfpu = true;
}
}
if (i == 4 && !strncasecmp(p, ".end", 4)) break;
AppendThing(a);
a->things.p[a->things.n - 1].t = TT_SLICE;
a->things.p[a->things.n - 1].s = AppendSauce(a, path, line);
a->things.p[a->things.n - 1].i = a->slices.n;
AppendSlice(a);
a->slices.p[a->slices.n - 1].p = p;
a->slices.p[a->slices.n - 1].n = i;
p += i;
continue;
}
if (('0' <= c && c <= '9') || (c == '.' && '0' <= p[1] && p[1] <= '9')) {
isfloat = c == '.';
if (c == '0' && p[1] != '.') {
if (p[1] == 'x' || p[1] == 'X') {
for (i = 2;; ++i) {
if (!(('0' <= p[i] && p[i] <= '9') ||
('a' <= p[i] && p[i] <= 'f') ||
('A' <= p[i] && p[i] <= 'F'))) {
break;
}
}
} else if ((p[1] == 'b' || p[1] == 'B') &&
('0' <= p[2] && p[2] <= '9')) {
for (i = 2;; ++i) {
if (!(p[i] == '0' || p[i] == '1')) break;
}
} else {
for (i = 1;; ++i) {
if (!('0' <= p[i] && p[i] <= '7')) break;
}
}
} else {
for (i = 1;; ++i) {
if (('0' <= p[i] && p[i] <= '9') || p[i] == '-' || p[i] == '+') {
continue;
} else if (p[i] == '.' || p[i] == 'e' || p[i] == 'E' || p[i] == 'e') {
isfloat = true;
continue;
}
break;
}
}
AppendThing(a);
if (isfloat) {
APPEND(a->floats);
a->floats.p[a->floats.n - 1] = strtold(p, NULL);
a->things.p[a->things.n - 1].i = a->floats.n - 1;
a->things.p[a->things.n - 1].t = TT_FLOAT;
} else {
APPEND(a->ints);
a->ints.p[a->ints.n - 1] = strtol(p, NULL, 0);
a->things.p[a->things.n - 1].i = a->ints.n - 1;
if (p[i] == 'f' || p[i] == 'F') {
a->things.p[a->things.n - 1].t = TT_FORWARD;
++i;
} else if (p[i] == 'b' || p[i] == 'B') {
a->things.p[a->things.n - 1].t = TT_BACKWARD;
++i;
} else {
a->things.p[a->things.n - 1].t = TT_INT;
}
}
a->things.p[a->things.n - 1].s = AppendSauce(a, path, line);
p += i;
continue;
}
if (c == '\'') {
i = 1;
c = p[i++];
c = ReadCharLiteral(&buf, c, p, &i);
if (p[i] == '\'') ++i;
p += i;
AppendThing(a);
a->things.p[a->things.n - 1].t = TT_INT;
a->things.p[a->things.n - 1].s = AppendSauce(a, path, line);
a->things.p[a->things.n - 1].i = a->ints.n;
APPEND(a->ints);
a->ints.p[a->ints.n - 1] = c;
continue;
}
if (c == '"') {
buf.n = 0;
buf.p = NULL;
for (i = 1; (c = p[i++]);) {
if (c == '"') break;
c = ReadCharLiteral(&buf, c, p, &i);
APPEND(buf);
buf.p[buf.n - 1] = c;
}
p += i;
if (a->things.n && a->things.p[a->things.n - 1].t == TT_SLICE &&
IS(a->slices.p[a->things.p[a->things.n - 1].i].p,
a->slices.p[a->things.p[a->things.n - 1].i].n, ".include")) {
APPEND(buf);
buf.p[buf.n - 1] = '\0';
--a->things.n;
if ((path2 = FindInclude(a, buf.p))) {
Tokenize(a, StrDup(a, path2));
free(path2);
free(buf.p);
} else {
Fail(a, "not found: %s", buf.p);
}
} else {
SaveString(&a->strings, buf.p);
AppendThing(a);
a->things.p[a->things.n - 1].t = TT_SLICE;
a->things.p[a->things.n - 1].s = AppendSauce(a, path, line);
a->things.p[a->things.n - 1].i = a->slices.n;
AppendSlice(a);
a->slices.p[a->slices.n - 1] = buf;
}
continue;
}
if (IsPunctMergeable(c) && a->things.n &&
a->things.p[a->things.n - 1].t == TT_PUNCT &&
IsPunctMergeable(a->things.p[a->things.n - 1].i)) {
a->things.p[a->things.n - 1].i = a->things.p[a->things.n - 1].i << 8 | c;
} else {
AppendThing(a);
a->things.p[a->things.n - 1].t = TT_PUNCT;
a->things.p[a->things.n - 1].s = AppendSauce(a, path, line);
a->things.p[a->things.n - 1].i = c;
}
++p;
}
}
static int GetSymbol(struct As *a, int name) {
struct HashEntry *p;
unsigned i, j, k, n, m, h, n2;
h = Hash(a->slices.p[name].p, a->slices.p[name].n);
n = a->symbolindex.n;
i = 0;
if (n) {
k = 0;
do {
i = (h + k + ((k + 1) >> 1)) & (n - 1);
if (a->symbolindex.p[i].h == h &&
a->slices.p[a->symbols.p[a->symbolindex.p[i].i].name].n ==
a->slices.p[name].n &&
!memcmp(a->slices.p[a->symbols.p[a->symbolindex.p[i].i].name].p,
a->slices.p[name].p, a->slices.p[name].n)) {
return a->symbolindex.p[i].i;
}
++k;
} while (a->symbolindex.p[i].h);
}
if (++a->symbolindex.i >= (n >> 1)) {
m = n ? n << 1 : 16;
p = calloc(m, sizeof(struct HashEntry));
for (j = 0; j < n; ++j) {
if (a->symbolindex.p[j].h) {
k = 0;
do {
i = (a->symbolindex.p[j].h + k + ((k + 1) >> 1)) & (m - 1);
++k;
} while (p[i].h);
p[i].h = a->symbolindex.p[j].h;
p[i].i = a->symbolindex.p[j].i;
}
}
k = 0;
do {
i = (h + k + ((k + 1) >> 1)) & (m - 1);
++k;
} while (p[i].h);
free(a->symbolindex.p);
a->symbolindex.p = p;
a->symbolindex.n = m;
}
APPEND(a->symbols);
memset(a->symbols.p + a->symbols.n - 1, 0, sizeof(*a->symbols.p));
a->symbolindex.p[i].h = h;
a->symbolindex.p[i].i = a->symbols.n - 1;
a->symbols.p[a->symbols.n - 1].name = name;
return a->symbols.n - 1;
}
static void OnSymbol(struct As *a, int name) {
int i = GetSymbol(a, name);
if (a->symbols.p[i].section) {
Fail(a, "already defined: %.*s", a->slices.p[name].n, a->slices.p[name].p);
}
a->symbols.p[i].section = a->section;
a->symbols.p[i].offset = a->sections.p[a->section].binary.n;
a->i += 2;
}
static void OnLocalLabel(struct As *a, int id) {
int i;
char *name;
name = xasprintf(".Label.%d", a->counter++);
SaveString(&a->strings, name);
AppendSlice(a);
a->slices.p[a->slices.n - 1].p = name;
a->slices.p[a->slices.n - 1].n = strlen(name);
i = GetSymbol(a, a->slices.n - 1);
a->symbols.p[i].section = a->section;
a->symbols.p[i].offset = a->sections.p[a->section].binary.n;
APPEND(a->labels);
a->labels.p[a->labels.n - 1].id = id;
a->labels.p[a->labels.n - 1].tok = a->i;
a->labels.p[a->labels.n - 1].symbol = i;
a->i += 2;
}
static void SetSection(struct As *a, int section) {
a->previous = a->section;
a->section = section;
}
static bool IsInt(struct As *a, int i) {
return a->things.p[i].t == TT_INT;
}
static bool IsFloat(struct As *a, int i) {
return a->things.p[i].t == TT_FLOAT;
}
static bool IsSlice(struct As *a, int i) {
return a->things.p[i].t == TT_SLICE;
}
static bool IsPunct(struct As *a, int i, int c) {
return a->things.p[i].t == TT_PUNCT && a->things.p[i].i == c;
}
static bool IsForward(struct As *a, int i) {
return a->things.p[i].t == TT_FORWARD;
}
static bool IsBackward(struct As *a, int i) {
return a->things.p[i].t == TT_BACKWARD;
}
static bool IsRegister(struct As *a, int i) {
return IsSlice(a, i) && (a->slices.p[a->things.p[i].i].n &&
*a->slices.p[a->things.p[i].i].p == '%');
}
static void ConsumePunct(struct As *a, int c) {
char pb[4];
if (IsPunct(a, a->i, c)) {
++a->i;
} else {
Fail(a, "expected %s", PunctToStr(c, pb));
}
}
static int NewPrimary(struct As *a, enum ExprKind k, long x) {
AppendExpr(a);
a->exprs.p[a->exprs.n - 1].kind = k;
a->exprs.p[a->exprs.n - 1].x = x;
return a->exprs.n - 1;
}
static int NewUnary(struct As *a, enum ExprKind k, int lhs) {
AppendExpr(a);
a->exprs.p[a->exprs.n - 1].kind = k;
a->exprs.p[a->exprs.n - 1].lhs = lhs;
return a->exprs.n - 1;
}
static int NewBinary(struct As *a, enum ExprKind k, int lhs, int rhs) {
AppendExpr(a);
a->exprs.p[a->exprs.n - 1].kind = k;
a->exprs.p[a->exprs.n - 1].lhs = lhs;
a->exprs.p[a->exprs.n - 1].rhs = rhs;
return a->exprs.n - 1;
}
// primary = int
// | symbol
// | reference
static int ParsePrimary(struct As *a, int *rest, int i) {
int e;
if (IsInt(a, i)) {
*rest = i + 1;
return NewPrimary(a, EX_INT, a->ints.p[a->things.p[i].i]);
} else if (IsForward(a, i) || IsBackward(a, i) ||
(IsSlice(a, i) && (a->slices.p[a->things.p[i].i].n &&
a->slices.p[a->things.p[i].i].p[0] != '%' &&
a->slices.p[a->things.p[i].i].p[0] != '@'))) {
*rest = i + 1;
return NewPrimary(a, EX_SYM, i);
} else {
Fail(a, "expected int or label");
}
}
// postfix = primary "@gotpcrel"
// | primary "@dtpoff"
// | primary "@tpoff"
// | primary
static int ParsePostfix(struct As *a, int *rest, int i) {
int x;
struct Slice suffix;
x = ParsePrimary(a, &i, i);
if (IsSlice(a, i)) {
suffix = a->slices.p[a->things.p[i].i];
if (suffix.n && suffix.p[0] == '@') {
if (IS(suffix.p, suffix.n, "@gotpcrel")) {
a->exprs.p[x].em = EM_GOTPCREL;
++i;
} else if (IS(suffix.p, suffix.n, "@dtpoff")) {
a->exprs.p[x].em = EM_DTPOFF;
++i;
} else if (IS(suffix.p, suffix.n, "@tpoff")) {
a->exprs.p[x].em = EM_TPOFF;
++i;
}
}
}
*rest = i;
return x;
}
// unary = ("+" | "-" | "!" | "~") unary
// | postfix
static int ParseUnary(struct As *a, int *rest, int i) {
int x;
if (IsPunct(a, i, '+')) {
x = ParseUnary(a, rest, i + 1);
} else if (IsPunct(a, i, '-')) {
x = ParseUnary(a, rest, i + 1);
if (a->exprs.p[x].kind == EX_INT) {
a->exprs.p[x].x = -a->exprs.p[x].x;
} else {
x = NewPrimary(a, EX_NEG, x);
}
} else if (IsPunct(a, i, '!')) {
x = ParseUnary(a, rest, i + 1);
if (a->exprs.p[x].kind == EX_INT) {
a->exprs.p[x].x = !a->exprs.p[x].x;
} else {
x = NewPrimary(a, EX_NOT, x);
}
} else if (IsPunct(a, i, '~')) {
x = ParseUnary(a, rest, i + 1);
if (a->exprs.p[x].kind == EX_INT) {
a->exprs.p[x].x = ~a->exprs.p[x].x;
} else {
x = NewPrimary(a, EX_BITNOT, x);
}
} else {
x = ParsePostfix(a, rest, i);
}
return x;
}
// mul = unary ("*" unary | "/" unary | "%" unary)*
static int ParseMul(struct As *a, int *rest, int i) {
int x, y;
x = ParseUnary(a, &i, i);
for (;;) {
if (IsPunct(a, i, '*')) {
y = ParseUnary(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x *= a->exprs.p[y].x;
} else {
x = NewBinary(a, EX_MUL, x, y);
}
} else if (IsPunct(a, i, '/')) {
y = ParseUnary(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x /= a->exprs.p[y].x;
} else {
x = NewBinary(a, EX_DIV, x, y);
}
} else if (IsPunct(a, i, '%')) {
y = ParseUnary(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x %= a->exprs.p[y].x;
} else {
x = NewBinary(a, EX_REM, x, y);
}
} else {
*rest = i;
return x;
}
}
}
// add = mul ("+" mul | "-" mul)*
static int ParseAdd(struct As *a, int *rest, int i) {
int x, y;
x = ParseMul(a, &i, i);
for (;;) {
if (IsPunct(a, i, '+')) {
y = ParseMul(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x += a->exprs.p[y].x;
} else {
x = NewBinary(a, EX_ADD, x, y);
}
} else if (IsPunct(a, i, '-')) {
y = ParseMul(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x -= a->exprs.p[y].x;
} else if (a->exprs.p[y].kind == EX_INT) {
a->exprs.p[y].x = -a->exprs.p[y].x;
x = NewBinary(a, EX_ADD, x, y);
} else {
x = NewBinary(a, EX_SUB, x, y);
}
} else {
*rest = i;
return x;
}
}
}
// shift = add ("<<" add | ">>" add)*
static int ParseShift(struct As *a, int *rest, int i) {
int x, y;
x = ParseAdd(a, &i, i);
for (;;) {
if (IsPunct(a, i, '<' << 8 | '<')) {
y = ParseAdd(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x <<= a->exprs.p[y].x & 63;
} else {
x = NewBinary(a, EX_SHL, x, y);
}
} else if (IsPunct(a, i, '>' << 8 | '>')) {
y = ParseAdd(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x >>= a->exprs.p[y].x & 63;
} else {
x = NewBinary(a, EX_SHR, x, y);
}
} else {
*rest = i;
return x;
}
}
}
// relational = shift ("<" shift | "<=" shift | ">" shift | ">=" shift)*
static int ParseRelational(struct As *a, int *rest, int i) {
int x, y;
x = ParseShift(a, &i, i);
for (;;) {
if (IsPunct(a, i, '<')) {
y = ParseShift(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x = a->exprs.p[x].x < a->exprs.p[y].x;
} else {
x = NewBinary(a, EX_LT, x, y);
}
} else if (IsPunct(a, i, '>')) {
y = ParseShift(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x = a->exprs.p[y].x < a->exprs.p[x].x;
} else {
x = NewBinary(a, EX_LT, y, x);
}
} else if (IsPunct(a, i, '<' << 8 | '=')) {
y = ParseShift(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x = a->exprs.p[x].x <= a->exprs.p[y].x;
} else {
x = NewBinary(a, EX_LE, x, y);
}
} else if (IsPunct(a, i, '>' << 8 | '=')) {
y = ParseShift(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x = a->exprs.p[y].x <= a->exprs.p[x].x;
} else {
x = NewBinary(a, EX_LE, y, x);
}
} else {
*rest = i;
return x;
}
}
}
// equality = relational ("==" relational | "!=" relational)*
static int ParseEquality(struct As *a, int *rest, int i) {
int x, y;
x = ParseRelational(a, &i, i);
for (;;) {
if (IsPunct(a, i, '=' << 8 | '=')) {
y = ParseRelational(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x = a->exprs.p[x].x == a->exprs.p[y].x;
} else {
x = NewBinary(a, EX_EQ, x, y);
}
} else if (IsPunct(a, i, '!' << 8 | '=')) {
y = ParseRelational(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x = a->exprs.p[x].x != a->exprs.p[y].x;
} else {
x = NewBinary(a, EX_NE, x, y);
}
} else {
*rest = i;
return x;
}
}
}
// and = equality ("&" equality)*
static int ParseAnd(struct As *a, int *rest, int i) {
int x, y;
x = ParseEquality(a, &i, i);
for (;;) {
if (IsPunct(a, i, '&')) {
y = ParseEquality(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x &= a->exprs.p[y].x;
} else {
x = NewBinary(a, EX_AND, x, y);
}
} else {
*rest = i;
return x;
}
}
}
// xor = and ("^" and)*
static int ParseXor(struct As *a, int *rest, int i) {
int x, y;
x = ParseAnd(a, &i, i);
for (;;) {
if (IsPunct(a, i, '^')) {
y = ParseAnd(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x ^= a->exprs.p[y].x;
} else {
x = NewBinary(a, EX_XOR, x, y);
}
} else {
*rest = i;
return x;
}
}
}
// or = xor ("|" xor)*
static int ParseOr(struct As *a, int *rest, int i) {
int x, y;
x = ParseXor(a, &i, i);
for (;;) {
if (IsPunct(a, i, '|')) {
y = ParseXor(a, &i, i + 1);
if (a->exprs.p[x].kind == EX_INT && a->exprs.p[y].kind == EX_INT) {
a->exprs.p[x].x |= a->exprs.p[y].x;
} else {
x = NewBinary(a, EX_OR, x, y);
}
} else {
*rest = i;
return x;
}
}
}
static int Parse(struct As *a) {
return ParseOr(a, &a->i, a->i);
}
static long GetInt(struct As *a) {
int x;
x = Parse(a);
if (a->exprs.p[x].kind == EX_INT) {
return a->exprs.p[x].x;
} else {
Fail(a, "expected constexpr int");
}
}
static long double GetFloat(struct As *a) {
long double res;
if (IsFloat(a, a->i)) {
res = a->floats.p[a->things.p[a->i].i];
++a->i;
return res;
} else {
Fail(a, "expected float");
}
}
static struct Slice GetSlice(struct As *a) {
struct Slice res;
if (IsSlice(a, a->i)) {
res = a->slices.p[a->things.p[a->i].i];
++a->i;
return res;
} else {
Fail(a, "expected string");
}
}
static void EmitData(struct As *a, const void *p, unsigned long n) {
struct Slice *s;
s = &a->sections.p[a->section].binary;
s->p = realloc(s->p, s->n + n);
memcpy(s->p + s->n, p, n);
s->n += n;
}
static void EmitByte(struct As *a, unsigned long x) {
unsigned char b[1];
b[0] = x >> 000;
EmitData(a, b, 1);
}
static void EmitWord(struct As *a, unsigned long x) {
unsigned char b[2];
b[0] = x >> 000;
b[1] = x >> 010;
EmitData(a, b, 2);
}
static void EmitLong(struct As *a, unsigned long x) {
unsigned char b[4];
b[0] = x >> 000;
b[1] = x >> 010;
b[2] = x >> 020;
b[3] = x >> 030;
EmitData(a, b, 4);
}
void EmitQuad(struct As *a, unsigned long x) {
unsigned char b[8];
b[0] = x >> 000;
b[1] = x >> 010;
b[2] = x >> 020;
b[3] = x >> 030;
b[4] = x >> 040;
b[5] = x >> 050;
b[6] = x >> 060;
b[7] = x >> 070;
EmitData(a, b, 8);
}
static void EmitVarword(struct As *a, unsigned long x) {
if (x > 255) EmitVarword(a, x >> 8);
EmitByte(a, x);
}
static void OnSleb128(struct As *a, struct Slice s) {
int c;
long x;
while (!IsPunct(a, a->i, ';')) {
x = GetInt(a);
for (;;) {
c = x & 0x7f;
x >>= 7;
if ((x == 0 && !(c & 0x40)) || (x == -1 && (c & 0x40))) {
break;
} else {
c |= 0x80;
}
EmitByte(a, c);
}
}
}
static void OnUleb128(struct As *a, struct Slice s) {
int c;
unsigned long x;
while (!IsPunct(a, a->i, ';')) {
x = GetInt(a);
do {
c = x & 0x7f;
x >>= 7;
if (x) c |= 0x80;
EmitByte(a, c);
} while (x);
}
}
static void OnZero(struct As *a, struct Slice s) {
long n;
char *p;
while (IsInt(a, a->i)) {
n = GetInt(a);
p = calloc(n, 1);
EmitData(a, p, n);
free(p);
}
}
static void OnSpace(struct As *a, struct Slice s) {
long n;
char *p;
p = malloc((n = GetInt(a)));
memset(p, IsInt(a, a->i) ? GetInt(a) : 0, n);
EmitData(a, p, n);
free(p);
}
static long GetRelaAddend(int kind) {
switch (kind) {
case R_X86_64_PC8:
return -1;
case R_X86_64_PC16:
return -2;
case R_X86_64_PC32:
case R_X86_64_PLT32:
case R_X86_64_GOTPCRELX:
return -4;
default:
return 0;
}
}
static void EmitExpr(struct As *a, int expr, int kind,
void emitter(struct As *, unsigned long)) {
if (expr == -1) {
emitter(a, 0);
} else if (a->exprs.p[expr].kind == EX_INT) {
emitter(a, a->exprs.p[expr].x);
} else {
AppendRela(a);
a->relas.p[a->relas.n - 1].kind = kind;
a->relas.p[a->relas.n - 1].expr = expr;
a->relas.p[a->relas.n - 1].section = a->section;
a->relas.p[a->relas.n - 1].offset = a->sections.p[a->section].binary.n;
a->relas.p[a->relas.n - 1].addend = GetRelaAddend(kind);
emitter(a, 0);
}
}
static void OnByte(struct As *a, struct Slice s) {
do {
EmitExpr(a, Parse(a), R_X86_64_8, EmitByte);
} while (!IsPunct(a, a->i, ';'));
}
static void OnWord(struct As *a, struct Slice s) {
do {
EmitExpr(a, Parse(a), R_X86_64_16, EmitWord);
} while (!IsPunct(a, a->i, ';'));
}
static void OnLong(struct As *a, struct Slice s) {
do {
EmitExpr(a, Parse(a), R_X86_64_32, EmitLong);
} while (!IsPunct(a, a->i, ';'));
}
static void OnQuad(struct As *a, struct Slice s) {
do {
EmitExpr(a, Parse(a), R_X86_64_64, EmitQuad);
} while (!IsPunct(a, a->i, ';'));
}
static void OnFloat(struct As *a, struct Slice s) {
float f;
char b[4];
for (;;) {
if (IsFloat(a, a->i)) {
f = GetFloat(a);
} else if (IsInt(a, a->i)) {
f = GetInt(a);
} else {
break;
}
memcpy(b, &f, 4);
EmitData(a, b, 4);
}
}
static void OnDouble(struct As *a, struct Slice s) {
double f;
char b[8];
for (;;) {
if (IsFloat(a, a->i)) {
f = GetFloat(a);
} else if (IsInt(a, a->i)) {
f = GetInt(a);
} else {
break;
}
memcpy(b, &f, 8);
EmitData(a, b, 8);
}
}
static void OnLongDouble(struct As *a, int n) {
char b[16];
long double f;
for (;;) {
if (IsFloat(a, a->i)) {
f = GetFloat(a);
} else if (IsInt(a, a->i)) {
f = GetInt(a);
} else {
break;
}
memset(b, 0, 16);
memcpy(b, &f, sizeof(f));
EmitData(a, b, n);
}
}
static void OnFloat80(struct As *a, struct Slice s) {
OnLongDouble(a, 10);
}
static void OnLdbl(struct As *a, struct Slice s) {
OnLongDouble(a, 16);
}
static void OnAscii(struct As *a, struct Slice s) {
struct Slice arg;
while (IsSlice(a, a->i)) {
arg = GetSlice(a);
EmitData(a, arg.p, arg.n);
}
}
static void OnAsciz(struct As *a, struct Slice s) {
struct Slice arg;
while (IsSlice(a, a->i)) {
arg = GetSlice(a);
EmitData(a, arg.p, arg.n);
EmitByte(a, 0);
}
}
static void OnAbort(struct As *a, struct Slice s) {
Fail(a, "aborted");
}
static void OnErr(struct As *a, struct Slice s) {
if (a->inhibiterr) return;
Fail(a, "error");
}
static void OnError(struct As *a, struct Slice s) {
struct Slice msg = GetSlice(a);
if (a->inhibiterr) return;
Fail(a, "%.*s", msg.n, msg.p);
}
static void OnWarning(struct As *a, struct Slice s) {
struct Slice msg = GetSlice(a);
if (a->inhibitwarn) return;
PrintLocation(a);
fprintf(stderr, "%.*s\n", msg.n, msg.p);
}
static void OnText(struct As *a, struct Slice s) {
SetSection(a, 1);
}
static void OnData(struct As *a, struct Slice s) {
SetSection(a, 2);
}
static void OnBss(struct As *a, struct Slice s) {
SetSection(a, 3);
}
static void OnPrevious(struct As *a, struct Slice s) {
SetSection(a, a->previous);
}
static void OnAlign(struct As *a, struct Slice s) {
long i, n, align, fill, maxskip;
align = GetInt(a);
if (__builtin_popcountl(align) != 1) Fail(a, "alignment not power of 2");
fill = (a->sections.p[a->section].flags & SHF_EXECINSTR) ? 0x90 : 0;
maxskip = 268435456;
if (IsInt(a, a->i)) {
fill = GetInt(a);
if (IsInt(a, a->i)) {
maxskip = GetInt(a);
}
}
i = a->sections.p[a->section].binary.n;
n = ROUNDUP(i, align) - i;
if (n > maxskip) return;
a->sections.p[a->section].align = MAX(a->sections.p[a->section].align, align);
for (i = 0; i < n; ++i) EmitByte(a, fill);
}
static int SectionFlag(struct As *a, int c) {
switch (c) {
case 'a':
return SHF_ALLOC;
case 'w':
return SHF_WRITE;
case 'x':
return SHF_EXECINSTR;
case 'g':
return SHF_GROUP;
case 'M':
return SHF_MERGE;
case 'S':
return SHF_STRINGS;
case 'T':
return SHF_TLS;
default:
Fail(a, "unknown section flag: %`'c", c);
}
}
static int SectionFlags(struct As *a, struct Slice s) {
int i, flags;
for (flags = i = 0; i < s.n; ++i) {
flags |= SectionFlag(a, s.p[i]);
}
return flags;
}
static int SectionType(struct As *a, struct Slice s) {
if (IS(s.p, s.n, "@progbits") || IS(s.p, s.n, "SHT_PROGBITS")) {
return SHT_PROGBITS;
} else if (IS(s.p, s.n, "@note") || IS(s.p, s.n, "SHT_NOTE")) {
return SHT_NOTE;
} else if (IS(s.p, s.n, "@nobits") || IS(s.p, s.n, "SHT_NOBITS")) {
return SHT_NOBITS;
} else if (IS(s.p, s.n, "@preinit_array") ||
IS(s.p, s.n, "SHT_PREINIT_ARRAY")) {
return SHT_PREINIT_ARRAY;
} else if (IS(s.p, s.n, "@init_array") || IS(s.p, s.n, "SHT_INIT_ARRAY")) {
return SHT_INIT_ARRAY;
} else if (IS(s.p, s.n, "@fini_array") || IS(s.p, s.n, "SHT_FINI_ARRAY")) {
return SHT_FINI_ARRAY;
} else {
Fail(a, "unknown section type: %.*s", s.n, s.p);
}
}
static int SymbolType(struct As *a, struct Slice s) {
if (IS(s.p, s.n, "@object") || IS(s.p, s.n, "STT_OBJECT")) {
return STT_OBJECT;
} else if (IS(s.p, s.n, "@function") || IS(s.p, s.n, "STT_FUNC")) {
return STT_FUNC;
} else if (IS(s.p, s.n, "@common") || IS(s.p, s.n, "STT_COMMON")) {
return STT_COMMON;
} else if (IS(s.p, s.n, "@notype") || IS(s.p, s.n, "STT_NOTYPE")) {
return STT_NOTYPE;
} else if (IS(s.p, s.n, "@tls_object") || IS(s.p, s.n, "STT_TLS")) {
return STT_TLS;
} else {
Fail(a, "unknown symbol type: %.*s", s.n, s.p);
}
}
static int GrabSection(struct As *a, int name, int flags, int type) {
int i;
for (i = 0; i < a->sections.n; ++i) {
if (!strcmp(a->strings.p[name], a->strings.p[a->sections.p[i].name])) {
return i;
}
}
return AppendSection(a, name, flags, type);
}
static void OnSection(struct As *a, struct Slice s) {
int name, flags, type;
name = SliceDup(a, GetSlice(a));
if (startswith(a->strings.p[name], ".text")) {
flags = SHF_ALLOC | SHF_EXECINSTR;
type = SHT_PROGBITS;
} else if (startswith(a->strings.p[name], ".data")) {
flags = SHF_ALLOC | SHF_WRITE;
type = SHT_PROGBITS;
} else if (startswith(a->strings.p[name], ".bss")) {
flags = SHF_ALLOC | SHF_WRITE;
type = SHT_NOBITS;
} else {
flags = SHF_ALLOC | SHF_EXECINSTR | SHF_WRITE;
type = SHT_PROGBITS;
}
if (IsSlice(a, a->i)) {
flags = SectionFlags(a, GetSlice(a));
if (IsSlice(a, a->i)) {
type = SectionType(a, GetSlice(a));
}
}
SetSection(a, GrabSection(a, name, flags, type));
}
static void OnPushsection(struct As *a, struct Slice s) {
APPEND(a->sectionstack);
a->sectionstack.p[a->sectionstack.n - 1] = a->section;
OnSection(a, s);
}
static void OnPopsection(struct As *a, struct Slice s) {
if (!a->sectionstack.n) Fail(a, "stack smashed");
a->section = a->sectionstack.p[--a->sectionstack.n];
}
static void OnIdent(struct As *a, struct Slice s) {
struct Slice arg;
int comment, oldsection;
comment = GrabSection(a, StrDup(a, ".comment"), SHF_MERGE | SHF_STRINGS,
SHT_PROGBITS);
oldsection = a->section;
a->section = comment;
arg = GetSlice(a);
EmitData(a, arg.p, arg.n);
EmitByte(a, 0);
a->section = oldsection;
}
static void OnIncbin(struct As *a, struct Slice s) {
int fd;
struct stat st;
char *path, *path2;
struct Slice *data, arg;
arg = GetSlice(a);
path = strndup(arg.p, arg.n);
if ((path2 = FindInclude(a, path))) {
if ((fd = open(path2, O_RDONLY)) == -1 || fstat(fd, &st) == -1) {
Fail(a, "open failed: %s", path2);
}
data = &a->sections.p[a->section].binary;
data->p = realloc(data->p, data->n + st.st_size);
if (read(fd, data->p, st.st_size) != st.st_size) {
Fail(a, "read failed: %s", path2);
}
data->n += st.st_size;
close(fd);
free(path2);
} else {
Fail(a, "not found: %s", path);
}
free(path);
}
static void OnType(struct As *a, struct Slice s) {
int i;
i = GetSymbol(a, a->things.p[a->i++].i);
a->symbols.p[i].type = SymbolType(a, GetSlice(a));
}
static void OnSize(struct As *a, struct Slice s) {
int i;
i = GetSymbol(a, a->things.p[a->i++].i);
a->symbols.p[i].size = GetInt(a);
}
static void OpVisibility(struct As *a, int visibility) {
int i;
while (IsSlice(a, a->i)) {
i = GetSymbol(a, a->things.p[a->i++].i);
a->symbols.p[i].stv = visibility;
}
}
static void OnInternal(struct As *a, struct Slice s) {
OpVisibility(a, STV_INTERNAL);
}
static void OnHidden(struct As *a, struct Slice s) {
OpVisibility(a, STV_HIDDEN);
}
static void OnProtected(struct As *a, struct Slice s) {
OpVisibility(a, STV_PROTECTED);
}
static void OpBind(struct As *a, int bind) {
int i;
while (IsSlice(a, a->i)) {
i = GetSymbol(a, a->things.p[a->i++].i);
a->symbols.p[i].stb = bind;
}
}
static void OnLocal(struct As *a, struct Slice s) {
OpBind(a, STB_LOCAL);
}
static void OnWeak(struct As *a, struct Slice s) {
OpBind(a, STB_WEAK);
}
static void OnGlobal(struct As *a, struct Slice s) {
OpBind(a, STB_GLOBAL);
}
static int GetOpSize(struct As *a, struct Slice s, int modrm, int i) {
if (modrm & ISREG) {
return (modrm & 070) >> 3;
} else {
switch (s.p[s.n - i]) {
case 'b':
case 'B':
return 0;
case 'w':
case 'W':
return 1;
case 'l':
case 'L':
return 2;
case 'q':
case 'Q':
return 3;
default:
Fail(a, "could not size instruction");
}
}
}
static bool ConsumeSegment(struct As *a) {
int i;
struct Slice s;
if (IsSlice(a, a->i)) {
s = a->slices.p[a->things.p[a->i].i];
if (s.n == 3 && *s.p == '%') {
for (i = 0; i < ARRAYLEN(kSegment); ++i) {
if (s.p[1] == kSegment[i][0] && s.p[2] == kSegment[i][1]) {
++a->i;
EmitByte(a, kSegmentByte[i]);
ConsumePunct(a, ':');
return true;
}
}
}
}
return false;
}
static void CopyLower(char *k, const char *p, int n) {
int i;
for (i = 0; i < n; ++i) {
k[i] = tolower(p[i]);
}
}
static unsigned long MakeKey64(const char *p, int n) {
char k[8] = {0};
CopyLower(k, p, n);
return LOAD64BE(k);
}
static unsigned __int128 MakeKey128(const char *p, int n) {
char k[16] = {0};
CopyLower(k, p, n);
return LOAD128BE(k);
}
static bool Prefix(struct As *a, const char *p, int n) {
int m, l, r;
unsigned long x, y;
if (n && n <= 8) {
x = MakeKey64(p, n);
l = 0;
r = ARRAYLEN(kPrefix) - 1;
while (l <= r) {
m = (l + r) >> 1;
y = LOAD64BE(kPrefix[m]);
if (x < y) {
r = m - 1;
} else if (x > y) {
l = m + 1;
} else {
EmitByte(a, kPrefixByte[m]);
return true;
}
}
}
return false;
}
static bool FindReg(const char *p, int n, struct Reg *out_reg) {
int m, l, r;
unsigned long x, y;
if (n && n <= 8 && *p == '%') {
++p;
--n;
x = MakeKey64(p, n);
l = 0;
r = ARRAYLEN(kRegs) - 1;
while (l <= r) {
m = (l + r) >> 1;
y = LOAD64BE(kRegs[m].s);
if (x < y) {
r = m - 1;
} else if (x > y) {
l = m + 1;
} else {
*out_reg = kRegs[m];
return true;
}
}
}
return false;
}
static int FindRegReg(struct Slice s) {
struct Reg reg;
if (!FindReg(s.p, s.n, &reg)) return -1;
return reg.reg;
}
static int FindRegRm(struct Slice s) {
struct Reg reg;
if (!FindReg(s.p, s.n, &reg)) return -1;
return reg.rm;
}
static int FindRegBase(struct Slice s) {
struct Reg reg;
if (!FindReg(s.p, s.n, &reg)) return -1;
return reg.base;
}
static int FindRegIndex(struct Slice s) {
struct Reg reg;
if (!FindReg(s.p, s.n, &reg)) return -1;
return reg.index;
}
static int RemoveRexw(int x) {
if (x == -1) return x;
x &= ~0x0800;
if (((x & 0xff00) >> 8) == REX) x &= ~0xff00;
return x;
}
static int GetRegisterReg(struct As *a) {
int reg;
struct Slice wut;
if ((reg = FindRegReg(GetSlice(a))) == -1) InvalidRegister(a);
return reg;
}
static int GetRegisterRm(struct As *a) {
int reg;
struct Slice wut;
if ((reg = FindRegRm(GetSlice(a))) == -1) InvalidRegister(a);
return reg;
}
static int ParseModrm(struct As *a, int *disp) {
/* ┌isreg
│┌isrip
││┌hasindex
│││┌hasbase
││││┌hasasz
│││││┌rex
││││││ ┌scale
││││││ │ ┌index or size
││││││ │ │ ┌base or reg
│││││├──────┐├┐├─┐├─┐
0b00000000000000000000000000000000*/
struct Slice str;
int reg, scale, modrm = 0;
if (!ConsumeSegment(a) && IsRegister(a, a->i)) {
*disp = 0;
modrm = GetRegisterRm(a) | ISREG;
} else {
if (!IsPunct(a, a->i, '(')) {
*disp = Parse(a);
} else {
*disp = -1;
}
if (IsPunct(a, a->i, '(')) {
++a->i;
if ((str = GetSlice(a)).n) {
modrm |= HASBASE;
if (!strncasecmp(str.p, "%rip", str.n)) {
modrm |= ISRIP;
} else {
reg = FindRegBase(str);
if (reg == -1) InvalidRegister(a);
modrm |= reg & 007; // reg
modrm |= reg & 0xff00; // rex
if (((reg & 070) >> 3) == 2) modrm |= HASASZ; // asz
}
}
if (!IsPunct(a, a->i, ')')) {
modrm |= HASINDEX;
reg = FindRegIndex(GetSlice(a));
if (reg == -1) InvalidRegister(a);
modrm |= (reg & 007) << 3; // index
modrm |= reg & 0xff00; // rex
if (((reg & 070) >> 3) == 2) modrm |= HASASZ; // asz
if (!IsPunct(a, a->i, ')')) {