/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ │ Copyright 2020 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ │ above copyright notice and this permission notice appear in all copies. │ │ │ │ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ │ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ │ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ │ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ │ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ │ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" #include "libc/bits/popcnt.h" #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/elf/def.h" #include "libc/fmt/conv.h" #include "libc/log/log.h" #include "libc/macros.h" #include "libc/mem/mem.h" #include "libc/nexgen32e/bsr.h" #include "libc/runtime/runtime.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "libc/sysv/consts/o.h" #include "libc/x/x.h" #include "third_party/gdtoa/gdtoa.h" #include "tool/build/lib/elfwriter.h" /* WORK IN PROGRESS */ #define OSZ 0x66 #define ASZ 0x67 #define REX 0x40 // byte #define REXB 0x41 // src #define REXX 0x42 // index #define REXR 0x44 // dest #define REXW 0x48 // quad #define HASASZ 0x00010000 #define HASBASE 0x00020000 #define HASINDEX 0x00040000 #define ISRIP 0x00080000 #define ISREG 0x00100000 #define APPEND(L) L.p = realloc(L.p, ++L.n * sizeof(*L.p)) #define IS(P, N, S) (N == strlen(S) && !strncasecmp(P, S, strlen(S))) struct Strings { size_t n; char **p; }; struct Assembler { int i; int section; int previous; struct Ints { size_t n; long *p; } ints; struct Floats { size_t n; long double *p; } floats; struct Slices { size_t n; struct Slice { size_t n; char *p; } * p; } slices; struct Sauces { size_t n; struct Sauce { const char *path; int line; } * p; } sauces; struct Things { size_t n; struct Thing { enum ThingType { TT_INT, TT_FLOAT, TT_SLICE, TT_PUNCT, TT_FORWARD, TT_BACKWARD, } t; int i; int s; } * p; } things; struct Sections { size_t n; struct Section { const char *name; int flags; int type; int align; struct Slice binary; } * p; } sections; struct Symbols { size_t n; struct Symbol { int name; int section; int stb; int stv; int type; long location; long size; } * p; } symbols; struct Labels { size_t n; struct Label { int s; int id; int section; long location; } * p; } labels; }; static const char kPrefixByte[30] = { 0xf3, 0xf3, 0xf3, 0xf2, 0xf2, 0xf0, 0x26, 0x2e, 0x36, 0x3e, 0x64, 0x65, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x66, 0x67, }; static const char kPrefix[30][8] = { "rep", "repz", "repe", "repne", "repnz", "lock", "es", "cs", "ss", "ds", "fs", "gs", "rex", "rex.b", "rex.x", "rex.xb", "rex.r", "rex.rb", "rex.rx", "rex.rxb", "rex.w", "rex.wb", "rex.wx", "rex.wxb", "rex.wr", "rex.wrb", "rex.wrx", "rex.wrxb", "data16", "addr32", }; /** * Context-sensitive register encoding information. * * ┌rex * │ ┌log₂size * │ │ ┌reg * ├──────┐ ├─┐├─┐ * 0b0000000000000000 */ static const struct Reg { char s[8]; short reg; short rm; short base; short index; } kRegs[] = /* clang-format off */ { {"ah", 4, 4, -1, -1 }, {"al", 0, 0, -1, -1 }, {"ax", 0 | 1<<3, 0 | 1<<3, -1, -1 }, {"bh", 7, 7, -1, -1 }, {"bl", 3, 3, -1, -1 }, {"bp", 5 | 1<<3, 5 | 1<<3, -1, -1 }, {"bpl", 5 | REX<<8, 5 | REX<<8, -1, -1 }, {"bx", 3 | 1<<3, 3 | 1<<3, -1, -1 }, {"ch", 5, 5, -1, -1 }, {"cl", 1, 1, -1, -1 }, {"cx", 1 | 1<<3, 1 | 1<<3, -1, -1 }, {"dh", 6, 6, -1, -1 }, {"di", 7 | 1<<3, 7 | 1<<3, -1, -1 }, {"dil", 7 | REX<<8, 7 | REX<<8, -1, -1 }, {"dl", 2, 2, -1, -1 }, {"dx", 2 | 1<<3, 2 | 1<<3, -1, -1 }, {"eax", 0 | 2<<3, 0 | 2<<3, 0 | 2<<3, 0 | 2<<3 }, {"ebp", 5 | 2<<3, 5 | 2<<3, 5 | 2<<3, 5 | 2<<3 }, {"ebx", 3 | 2<<3, 3 | 2<<3, 3 | 2<<3, 3 | 2<<3 }, {"ecx", 1 | 2<<3, 1 | 2<<3, 1 | 2<<3, 1 | 2<<3 }, {"edi", 7 | 2<<3, 7 | 2<<3, 7 | 2<<3, 7 | 2<<3 }, {"edx", 2 | 2<<3, 2 | 2<<3, 2 | 2<<3, 2 | 2<<3 }, {"esi", 6 | 2<<3, 6 | 2<<3, 6 | 2<<3, 6 | 2<<3 }, {"esp", 4 | 2<<3, 4 | 2<<3, 4 | 2<<3, 4 | 2<<3 }, {"r10", 2 | 3<<3 | REXR<<8 | REXW<<8, 2 | 3<<3 | REXB<<8 | REXW<<8, 2 | 3<<3 | REXB<<8, 2 | 3<<3 | REXX<<8 }, {"r10b", 2 | REXR<<8, 2 | REXB<<8, -1, -1 }, {"r10d", 2 | 2<<3 | REXR<<8, 2 | 2<<3 | REXB<<8, 2 | 2<<3 | REXB<<8, 2 | 2<<3 | REXX<<8 }, {"r10w", 2 | 1<<3 | REXR<<8, 2 | 1<<3 | REXB<<8, -1, -1 }, {"r11", 3 | 3<<3 | REXR<<8 | REXW<<8, 3 | 3<<3 | REXB<<8 | REXW<<8, 3 | 3<<3 | REXB<<8, 3 | 3<<3 | REXX<<8 }, {"r11b", 3 | REXR<<8, 3 | REXB<<8, -1, -1 }, {"r11d", 3 | 2<<3 | REXR<<8, 3 | 2<<3 | REXB<<8, 3 | 2<<3 | REXB<<8, 3 | 2<<3 | REXX<<8 }, {"r11w", 3 | 1<<3 | REXR<<8, 3 | 1<<3 | REXB<<8, -1, -1 }, {"r12", 4 | 3<<3 | REXR<<8 | REXW<<8, 4 | 3<<3 | REXB<<8 | REXW<<8, 4 | 3<<3 | REXB<<8, 4 | 3<<3 | REXX<<8 }, {"r12b", 4 | REXR<<8, 4 | REXB<<8, -1, -1 }, {"r12d", 4 | 2<<3 | REXR<<8, 4 | 2<<3 | REXB<<8, 4 | 2<<3 | REXB<<8, 4 | 2<<3 | REXX<<8 }, {"r12w", 4 | 1<<3 | REXR<<8, 4 | 1<<3 | REXB<<8, -1, -1 }, {"r13", 5 | 3<<3 | REXR<<8 | REXW<<8, 5 | 3<<3 | REXB<<8 | REXW<<8, 5 | 3<<3 | REXB<<8, 5 | 3<<3 | REXX<<8 }, {"r13b", 5 | REXR<<8, 5 | REXB<<8, -1, -1 }, {"r13d", 5 | 2<<3 | REXR<<8, 5 | 2<<3 | REXB<<8, 5 | 2<<3 | REXB<<8, 5 | 2<<3 | REXX<<8 }, {"r13w", 5 | 1<<3 | REXR<<8, 5 | 1<<3 | REXB<<8, -1, -1 }, {"r14", 6 | 3<<3 | REXR<<8 | REXW<<8, 6 | 3<<3 | REXB<<8 | REXW<<8, 6 | 3<<3 | REXB<<8, 6 | 3<<3 | REXX<<8 }, {"r14b", 6 | REXR<<8, 6 | REXB<<8, -1, -1 }, {"r14d", 6 | 2<<3 | REXR<<8, 6 | 2<<3 | REXB<<8, 6 | 2<<3 | REXB<<8, 6 | 2<<3 | REXX<<8 }, {"r14w", 6 | 1<<3 | REXR<<8, 6 | 1<<3 | REXB<<8, -1, -1 }, {"r15", 7 | 3<<3 | REXR<<8 | REXW<<8, 7 | 3<<3 | REXB<<8 | REXW<<8, 7 | 3<<3 | REXB<<8, 7 | 3<<3 | REXX<<8 }, {"r15b", 7 | REXR<<8, 7 | REXB<<8, -1, -1 }, {"r15d", 7 | 2<<3 | REXR<<8, 7 | 2<<3 | REXB<<8, 7 | 2<<3 | REXB<<8, 7 | 2<<3 | REXX<<8 }, {"r15w", 7 | 1<<3 | REXR<<8, 7 | 1<<3 | REXB<<8, -1, -1 }, {"r8", 0 | 3<<3 | REXR<<8 | REXW<<8, 0 | 3<<3 | REXB<<8 | REXW<<8, 0 | 3<<3 | REXB<<8, 0 | 3<<3 | REXX<<8 }, {"r8b", 0 | REXR<<8, 0 | REXB<<8, -1, -1 }, {"r8d", 0 | 2<<3 | REXR<<8, 0 | 2<<3 | REXB<<8, 0 | 2<<3 | REXB<<8, 0 | 2<<3 | REXX<<8 }, {"r8w", 0 | 1<<3 | REXR<<8, 0 | 1<<3 | REXB<<8, -1, -1 }, {"r9", 1 | 3<<3 | REXR<<8 | REXW<<8, 1 | 3<<3 | REXB<<8 | REXW<<8, 1 | 3<<3 | REXB<<8, 1 | 3<<3 | REXX<<8 }, {"r9b", 1 | REXR<<8, 1 | REXB<<8, -1, -1 }, {"r9d", 1 | 2<<3 | REXR<<8, 1 | 2<<3 | REXB<<8, 1 | 2<<3 | REXB<<8, 1 | 2<<3 | REXX<<8 }, {"r9w", 1 | 1<<3 | REXR<<8, 1 | 1<<3 | REXB<<8, -1, -1 }, {"rax", 0 | 3<<3 | REXW<<8, 0 | 3<<3 | REXW<<8, 0 | 3<<3, 0 | 3<<3 }, {"rbp", 5 | 3<<3 | REXW<<8, 5 | 3<<3 | REXW<<8, 5 | 3<<3, 5 | 3<<3 }, {"rbx", 3 | 3<<3 | REXW<<8, 3 | 3<<3 | REXW<<8, 3 | 3<<3, 3 | 3<<3 }, {"rcx", 1 | 3<<3 | REXW<<8, 1 | 3<<3 | REXW<<8, 1 | 3<<3, 1 | 3<<3 }, {"rdi", 7 | 3<<3 | REXW<<8, 7 | 3<<3 | REXW<<8, 7 | 3<<3, 7 | 3<<3 }, {"rdx", 2 | 3<<3 | REXW<<8, 2 | 3<<3 | REXW<<8, 2 | 3<<3, 2 | 3<<3 }, {"rsi", 6 | 3<<3 | REXW<<8, 6 | 3<<3 | REXW<<8, 6 | 3<<3, 6 | 3<<3 }, {"rsp", 4 | 3<<3 | REXW<<8, 4 | 3<<3 | REXW<<8, 4 | 3<<3, 4 | 3<<3 }, {"si", 6 | 1<<3, 6 | 1<<3, 6 | 1<<3, 6 | 1<<3 }, {"sil", 6 | REX<<8, 6 | REX<<8, 6 | REX<<8, 6 | REX<<8 }, {"sp", 4 | 1<<3, 4 | 1<<3, 4 | 1<<3, 4 | 1<<3 }, {"spl", 4 | REX<<8, 4 | REX<<8, 4 | REX<<8, 4 | REX<<8 }, {"xmm0", 0 | 4<<3, 0 | 4<<3, -1, -1 }, {"xmm1", 1 | 4<<3, 1 | 4<<3, -1, -1 }, {"xmm10", 2 | 4<<3 | REXR<<8, 2 | 4<<3 | REXB<<8, -1, -1 }, {"xmm11", 3 | 4<<3 | REXR<<8, 3 | 4<<3 | REXB<<8, -1, -1 }, {"xmm12", 4 | 4<<3 | REXR<<8, 4 | 4<<3 | REXB<<8, -1, -1 }, {"xmm13", 5 | 4<<3 | REXR<<8, 5 | 4<<3 | REXB<<8, -1, -1 }, {"xmm14", 6 | 4<<3 | REXR<<8, 6 | 4<<3 | REXB<<8, -1, -1 }, {"xmm15", 7 | 4<<3 | REXR<<8, 7 | 4<<3 | REXB<<8, -1, -1 }, {"xmm2", 2 | 4<<3, 2 | 4<<3, -1, -1 }, {"xmm3", 3 | 4<<3, 3 | 4<<3, -1, -1 }, {"xmm4", 4 | 4<<3, 4 | 4<<3, -1, -1 }, {"xmm5", 5 | 4<<3, 5 | 4<<3, -1, -1 }, {"xmm6", 6 | 4<<3, 6 | 4<<3, -1, -1 }, {"xmm7", 7 | 4<<3, 7 | 4<<3, -1, -1 }, {"xmm8", 0 | 4<<3 | REXR<<8, 0 | 4<<3 | REXB<<8, -1, -1 }, {"xmm9", 1 | 4<<3 | REXR<<8, 1 | 4<<3 | REXB<<8, -1, -1 }, } /* clang-format on */; static bool g_ignore_err; static const char *g_input_path; static const char *g_output_path; static struct Strings g_include_paths; static void PrintSlice(struct Slice s) { fprintf(stderr, "%.*s\n", s.n, s.p); } static void AppendString(struct Strings *l, const char *p) { l->p = realloc(l->p, ++l->n * sizeof(*l->p)); l->p[l->n - 1] = p; } static void ReadFlags(int argc, char *argv[]) { int i; g_input_path = "-"; g_output_path = "a.out"; for (i = 1; i < argc; ++i) { if (!strcmp(argv[i], "-o")) { g_output_path = argv[++i]; } else if (startswith(argv[i], "-o")) { g_output_path = argv[i] + 2; } else if (!strcmp(argv[i], "-I")) { AppendString(&g_include_paths, argv[++i]); } else if (startswith(argv[i], "-I")) { AppendString(&g_include_paths, argv[i] + 2); } else if (!strcmp(argv[i], "-Z")) { g_ignore_err = true; } else if (argv[i][0] != '-') { g_input_path = argv[i]; } } } static int AppendSection(struct Assembler *a, const char *name, int flags, int type) { int i; APPEND(a->sections); i = a->sections.n - 1; a->sections.p[i].name = name; a->sections.p[i].flags = flags; a->sections.p[i].type = type; a->sections.p[i].align = 1; a->sections.p[i].binary.p = NULL; a->sections.p[i].binary.n = 0; return i; } static struct Assembler *NewAssembler(void) { struct Assembler *a = calloc(1, sizeof(struct Assembler)); APPEND(a->slices); a->slices.p[0].p = NULL; a->slices.p[0].n = 0; AppendSection(a, "", 0, SHT_NULL); AppendSection(a, ".text", SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS); AppendSection(a, ".data", SHF_ALLOC | SHF_WRITE, SHT_PROGBITS); AppendSection(a, ".bss", SHF_ALLOC | SHF_WRITE, SHT_NOBITS); a->section = 1; return a; } static int AppendLine(struct Assembler *a, char *path, int line) { if (!a->sauces.n || (line != a->sauces.p[a->sauces.n - 1].line || path != a->sauces.p[a->sauces.n - 1].path)) { APPEND(a->sauces); a->sauces.p[a->sauces.n - 1].path = path; a->sauces.p[a->sauces.n - 1].line = line; } return a->sauces.n - 1; } static int ReadCharLiteral(struct Slice *buf, int c, char *p, int *i) { if (c != '\\') return c; switch ((c = p[(*i)++])) { case 'a': return '\a'; case 'b': return '\b'; case 't': return '\t'; case 'n': return '\n'; case 'v': return '\v'; case 'f': return '\f'; case 'r': return '\r'; case 'e': return 033; case 'x': if (isxdigit(p[*i])) { c = hextoint(p[(*i)++]); if (isxdigit(p[*i])) { c = c * 16 + hextoint(p[(*i)++]); } } return c; case '0' ... '7': c -= '0'; if ('0' <= p[*i] && p[*i] <= '7') { c = c * 8 + (p[(*i)++] - '0'); if ('0' <= p[*i] && p[*i] <= '7') { c = c * 8 + (p[(*i)++] - '0'); } } return c; default: return c; } } static void CanonicalizeNewline(char *p) { int i = 0, j = 0; while (p[i]) { if (p[i] == '\r' && p[i + 1] == '\n') { i += 2; p[j++] = '\n'; } else if (p[i] == '\r') { i++; p[j++] = '\n'; } else { p[j++] = p[i++]; } } p[j] = '\0'; } static void RemoveBackslashNewline(char *p) { int i, j, n; for (i = j = n = 0; p[i];) { if (p[i] == '\\' && p[i + 1] == '\n') { i += 2; n++; } else if (p[i] == '\n') { p[j++] = p[i++]; for (; n > 0; n--) p[j++] = '\n'; } else { p[j++] = p[i++]; } } for (; n > 0; n--) p[j++] = '\n'; p[j] = '\0'; } static char *ReadFile(const char *path) { char *p; FILE *fp; int buflen, nread, end, n; if (strcmp(path, "-") == 0) { fp = stdin; } else { fp = fopen(path, "r"); if (!fp) return NULL; } buflen = 4096; nread = 0; p = calloc(1, buflen); for (;;) { end = buflen - 2; n = fread(p + nread, 1, end - nread, fp); if (n == 0) break; nread += n; if (nread == end) { buflen *= 2; p = realloc(p, buflen); } } if (fp != stdin) fclose(fp); if (nread > 0 && p[nread - 1] == '\\') { p[nread - 1] = '\n'; } else if (nread == 0 || p[nread - 1] != '\n') { p[nread++] = '\n'; } p[nread] = '\0'; return p; } static void Tokenize(struct Assembler *a, char *path) { bool bol; int c, i, line; char *p, *path2; struct Slice buf; if (!(p = ReadFile(path))) return; if (!memcmp(p, "\357\273\277", 3)) p += 3; CanonicalizeNewline(p); RemoveBackslashNewline(p); line = 1; bol = true; while ((c = *p)) { if (c == '#' || (c == '/' && bol) || (c == '/' && p[1] == '/')) { p = strchr(p, '\n'); continue; } if (c == '\n') { APPEND(a->things); a->things.p[a->things.n - 1].t = TT_PUNCT; a->things.p[a->things.n - 1].s = AppendLine(a, path, line); a->things.p[a->things.n - 1].i = ';'; ++p; bol = true; ++line; continue; } bol = false; if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f' || c == '\v' || c == ',') { ++p; continue; } if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_' || c == '%' || c == '@' || (c == '.' && !('0' <= p[1] && p[1] <= '9'))) { for (i = 1;; ++i) { if (!(('a' <= p[i] && p[i] <= 'z') || ('A' <= p[i] && p[i] <= 'Z') || ('0' <= p[i] && p[i] <= '9') || p[i] == '.' || p[i] == '_' || p[i] == '$')) { break; } } APPEND(a->things); a->things.p[a->things.n - 1].t = TT_SLICE; a->things.p[a->things.n - 1].s = AppendLine(a, path, line); a->things.p[a->things.n - 1].i = a->slices.n; APPEND(a->slices); a->slices.p[a->slices.n - 1].p = p; a->slices.p[a->slices.n - 1].n = i; p += i; continue; } if (('0' <= c && c <= '9') || (c == '.' && '0' <= p[1] && p[1] <= '9')) { bool isfloat = c == '.'; if (c == '0' && p[1] != '.') { if (p[1] == 'x' || p[1] == 'X') { for (i = 2;; ++i) { if (!(('0' <= p[i] && p[i] <= '9') || ('a' <= p[i] && p[i] <= 'f') || ('A' <= p[i] && p[i] <= 'F'))) { break; } } } else if (p[1] == 'b' || p[1] == 'B') { for (i = 2;; ++i) { if (!(p[i] == '0' || p[i] == '1')) break; } } else { for (i = 1;; ++i) { if (!('0' <= p[i] && p[i] <= '7')) break; } } } else { for (i = 1;; ++i) { if (('0' <= p[i] && p[i] <= '9') || p[i] == '-' || p[i] == '+') { continue; } else if (p[i] == '.' || p[i] == 'e' || p[i] == 'E' || p[i] == 'e') { isfloat = true; continue; } break; } } APPEND(a->things); if (isfloat) { APPEND(a->floats); a->floats.p[a->floats.n - 1] = strtold(p, NULL); a->things.p[a->things.n - 1].i = a->floats.n - 1; a->things.p[a->things.n - 1].t = TT_FLOAT; } else { APPEND(a->ints); a->ints.p[a->ints.n - 1] = strtol(p, NULL, 0); a->things.p[a->things.n - 1].i = a->ints.n - 1; if (p[i] == 'f' || p[i] == 'F') { a->things.p[a->things.n - 1].t = TT_FORWARD; } else if (p[i] == 'b' || p[i] == 'B') { a->things.p[a->things.n - 1].t = TT_BACKWARD; } else { a->things.p[a->things.n - 1].t = TT_INT; } } a->things.p[a->things.n - 1].s = AppendLine(a, path, line); p += i; continue; } if (c == '\'') { i = 1; c = p[i++]; c = ReadCharLiteral(&buf, c, p, &i); if (p[i] == '\'') ++i; p += i; APPEND(a->things); a->things.p[a->things.n - 1].t = TT_INT; a->things.p[a->things.n - 1].s = AppendLine(a, path, line); a->things.p[a->things.n - 1].i = a->ints.n; APPEND(a->ints); a->ints.p[a->ints.n - 1] = c; continue; } if (c == '"') { buf.n = 0; buf.p = NULL; for (i = 1; (c = p[i++]);) { if (c == '"') break; c = ReadCharLiteral(&buf, c, p, &i); APPEND(buf); buf.p[buf.n - 1] = c; } p += i; if (a->things.n && a->things.p[a->things.n - 1].t == TT_SLICE && IS(a->slices.p[a->things.p[a->things.n - 1].i].p, a->slices.p[a->things.p[a->things.n - 1].i].n, ".include")) { APPEND(buf); buf.p[buf.n - 1] = '\0'; --a->things.n; if (fileexists(buf.p)) { Tokenize(a, buf.p); } else { for (i = 0; i < g_include_paths.n; ++i) { path2 = xstrcat(g_include_paths.p[i], '/', buf.p); if (fileexists(path2)) { Tokenize(a, path2); free(path2); break; } else { free(path2); } } } free(buf.p); } else { APPEND(a->things); a->things.p[a->things.n - 1].t = TT_SLICE; a->things.p[a->things.n - 1].s = AppendLine(a, path, line); a->things.p[a->things.n - 1].i = a->slices.n; APPEND(a->slices); a->slices.p[a->slices.n - 1] = buf; } continue; } APPEND(a->things); a->things.p[a->things.n - 1].t = TT_PUNCT; a->things.p[a->things.n - 1].s = AppendLine(a, path, line); a->things.p[a->things.n - 1].i = c; ++p; } } static bool IsLocal(struct Assembler *a, int name) { if (name < 0) return true; return a->slices.p[name].n >= 2 && !memcmp(a->slices.p[name].p, ".L", 2); } static int GetSymbol(struct Assembler *a, int name) { int i; for (i = 0; i < a->symbols.n; ++i) { if (a->slices.p[a->symbols.p[i].name].n == a->slices.p[name].n && !memcmp(a->slices.p[a->symbols.p[i].name].p, a->slices.p[name].p, a->slices.p[name].n)) { return i; } } APPEND(a->symbols); i = a->symbols.n - 1; memset(&a->symbols.p[i], 0, sizeof(a->symbols.p[i])); a->symbols.p[i].name = name; return i; } static wontreturn void Fail(struct Assembler *a, const char *fmt, ...) { va_list va; fprintf(stderr, "%s:%d:: ", a->sauces.p[a->things.p[a->i].s].path, a->sauces.p[a->things.p[a->i].s].line); va_start(va, fmt); vfprintf(stderr, fmt, va); va_end(va); fputc('\n', stderr); __die(); } static void Label(struct Assembler *a, int name) { int i = GetSymbol(a, name); if (a->symbols.p[i].section) { Fail(a, "already defined: %.*s", a->slices.p[name].n, a->slices.p[name].p); } a->symbols.p[i].section = a->section; a->symbols.p[i].location = a->sections.p[a->section].binary.n; a->i += 2; } static void LocalLabel(struct Assembler *a, int id) { APPEND(a->labels); a->labels.p[a->labels.n - 1].s = a->things.p[a->i].s; a->labels.p[a->labels.n - 1].id = id; a->labels.p[a->labels.n - 1].section = a->section; a->labels.p[a->labels.n - 1].location = a->sections.p[a->section].binary.n; a->i += 2; } static void SetSection(struct Assembler *a, int section) { a->previous = a->section; a->section = section; } static bool IsInt(struct Assembler *a, int i) { return a->things.p[i].t == TT_INT; } static bool IsFloat(struct Assembler *a, int i) { return a->things.p[i].t == TT_FLOAT; } static bool IsSlice(struct Assembler *a, int i) { return a->things.p[i].t == TT_SLICE; } static bool IsPunct(struct Assembler *a, int i, int c) { return a->things.p[i].t == TT_PUNCT && a->things.p[i].i == c; } static void ConsumePunct(struct Assembler *a, int c) { if (IsPunct(a, a->i, c)) { ++a->i; } else { Fail(a, "expected %`'c", c); } } static long GetInt(struct Assembler *a) { long res; if (IsInt(a, a->i)) { res = a->ints.p[a->things.p[a->i].i]; ++a->i; return res; } else { Fail(a, "expected int"); } } static long double GetFloat(struct Assembler *a) { long double res; if (IsFloat(a, a->i)) { res = a->floats.p[a->things.p[a->i].i]; ++a->i; return res; } else { Fail(a, "expected float"); } } static struct Slice GetString(struct Assembler *a) { struct Slice res; if (IsSlice(a, a->i)) { res = a->slices.p[a->things.p[a->i].i]; ++a->i; return res; } else { Fail(a, "expected string"); } } static void Emit(struct Assembler *a, int c) { APPEND(a->sections.p[a->section].binary); a->sections.p[a->section].binary.p[a->sections.p[a->section].binary.n - 1] = c; } static void EmitWord(struct Assembler *a, long x) { Emit(a, x >> 000); Emit(a, x >> 010); } static void EmitLong(struct Assembler *a, long x) { Emit(a, x >> 000); Emit(a, x >> 010); Emit(a, x >> 020); Emit(a, x >> 030); } static void EmitQuad(struct Assembler *a, long x) { EmitLong(a, x >> 000); EmitLong(a, x >> 040); } static void OnZero(struct Assembler *a) { long i, n; while (IsInt(a, a->i)) { n = GetInt(a); for (i = 0; i < n; ++i) { Emit(a, 0); } } } static void OnFill(struct Assembler *a) { int x; long i, n; n = GetInt(a); if (IsInt(a, a->i)) { x = GetInt(a); } else { x = 0; } for (i = 0; i < n; ++i) { Emit(a, x); } } static void OnByte(struct Assembler *a) { while (IsInt(a, a->i)) { Emit(a, GetInt(a)); } } static void OnWord(struct Assembler *a) { long x; while (IsInt(a, a->i)) { x = GetInt(a); Emit(a, x >> 000); Emit(a, x >> 010); } } static void OnLong(struct Assembler *a) { while (IsInt(a, a->i)) { EmitLong(a, GetInt(a)); } } static void OnQuad(struct Assembler *a) { while (IsInt(a, a->i)) { EmitQuad(a, GetInt(a)); } } static void OnFloat(struct Assembler *a) { float f; char b[4]; while (IsFloat(a, a->i)) { f = GetFloat(a); memcpy(b, &f, 4); Emit(a, b[0]); Emit(a, b[1]); Emit(a, b[2]); Emit(a, b[3]); } } static void OnDouble(struct Assembler *a) { double f; char b[8]; while (IsFloat(a, a->i)) { f = GetFloat(a); memcpy(b, &f, 8); Emit(a, b[0]); Emit(a, b[1]); Emit(a, b[2]); Emit(a, b[3]); Emit(a, b[4]); Emit(a, b[5]); Emit(a, b[6]); Emit(a, b[7]); } } static void OnAscii(struct Assembler *a) { size_t i; struct Slice arg; while (IsSlice(a, a->i)) { arg = GetString(a); for (i = 0; i < arg.n; ++i) { Emit(a, arg.p[i]); } } } static void OnAsciz(struct Assembler *a) { size_t i; struct Slice arg; while (IsSlice(a, a->i)) { arg = GetString(a); for (i = 0; i < arg.n; ++i) { Emit(a, arg.p[i]); } Emit(a, 0); } } static void OnAbort(struct Assembler *a) { Fail(a, "aborted"); } static void OnErr(struct Assembler *a) { if (g_ignore_err) return; Fail(a, "error"); } static void OnError(struct Assembler *a) { struct Slice msg = GetString(a); if (g_ignore_err) return; Fail(a, "%.*s", msg.n, msg.p); } static void OnText(struct Assembler *a) { SetSection(a, 0); } static void OnData(struct Assembler *a) { SetSection(a, 1); } static void OnBss(struct Assembler *a) { SetSection(a, 2); } static void OnPrevious(struct Assembler *a) { SetSection(a, a->previous); } static void OnAlign(struct Assembler *a) { long i, n, align, fill, maxskip; align = GetInt(a); if (popcnt(align) != 1) Fail(a, "alignment not power of 2"); fill = (a->sections.p[a->section].flags & SHF_EXECINSTR) ? 0x90 : 0; maxskip = 268435456; if (IsInt(a, a->i)) { fill = GetInt(a); if (IsInt(a, a->i)) { maxskip = GetInt(a); } } i = a->sections.p[a->section].binary.n; n = ROUNDUP(i, align) - i; if (n > maxskip) return; a->sections.p[a->section].align = MAX(a->sections.p[a->section].align, align); for (i = 0; i < n; ++i) { Emit(a, fill); } } static int SectionFlag(struct Assembler *a, int c) { switch (c) { case 'a': return SHF_ALLOC; case 'w': return SHF_WRITE; case 'x': return SHF_EXECINSTR; case 'g': return SHF_GROUP; case 'M': return SHF_MERGE; case 'S': return SHF_STRINGS; case 'T': return SHF_TLS; default: Fail(a, "unknown section flag: %`'c", c); } } static int SectionFlags(struct Assembler *a, struct Slice s) { int i, flags; for (flags = i = 0; i < s.n; ++i) { flags |= SectionFlag(a, s.p[i]); } return flags; } static int SectionType(struct Assembler *a, struct Slice s) { if (IS(s.p, s.n, "@progbits")) { return SHT_PROGBITS; } else if (IS(s.p, s.n, "@note")) { return SHT_NOTE; } else if (IS(s.p, s.n, "@nobits")) { return SHT_NOBITS; } else if (IS(s.p, s.n, "@preinit_array")) { return SHT_PREINIT_ARRAY; } else if (IS(s.p, s.n, "@init_array")) { return SHT_INIT_ARRAY; } else if (IS(s.p, s.n, "@fini_array")) { return SHT_FINI_ARRAY; } else { Fail(a, "unknown section type: %.*s", s.n, s.p); } } static void OnSection(struct Assembler *a) { char *name; int flags, type; struct Slice arg; arg = GetString(a); name = strndup(arg.p, arg.n); if (startswith(name, ".text")) { flags = SHF_ALLOC | SHF_EXECINSTR; type = SHT_PROGBITS; } else if (startswith(name, ".data")) { flags = SHF_ALLOC | SHF_WRITE; type = SHT_PROGBITS; } else if (startswith(name, ".bss")) { flags = SHF_ALLOC | SHF_WRITE; type = SHT_NOBITS; } else { flags = SHF_ALLOC | SHF_EXECINSTR | SHF_WRITE; type = SHT_PROGBITS; } if (IsSlice(a, a->i)) { flags = SectionFlags(a, GetString(a)); if (IsSlice(a, a->i)) { type = SectionType(a, GetString(a)); } } SetSection(a, AppendSection(a, name, flags, type)); } static void OnIncbin(struct Assembler *a) { int fd; char *path; struct stat st; struct Slice arg; arg = GetString(a); path = strndup(arg.p, arg.n); if ((fd = open(path, O_RDONLY)) == -1 || fstat(fd, &st) == -1) { Fail(a, "open failed: %s", path); } a->sections.p[a->section].binary.p = realloc(a->sections.p[a->section].binary.p, a->sections.p[a->section].binary.n + st.st_size); if (read(fd, a->sections.p[a->section].binary.p, st.st_size) != st.st_size) { Fail(a, "read failed: %s", path); } a->sections.p[a->section].binary.n += st.st_size; close(fd); free(path); } static int SymbolType(struct Assembler *a, struct Slice s) { if (IS(s.p, s.n, "@object") || IS(s.p, s.n, "STT_OBJECT")) { return STT_OBJECT; } else if (IS(s.p, s.n, "@function") || IS(s.p, s.n, "STT_FUNC")) { return STT_FUNC; } else if (IS(s.p, s.n, "@common") || IS(s.p, s.n, "STT_COMMON")) { return STT_COMMON; } else if (IS(s.p, s.n, "@notype") || IS(s.p, s.n, "STT_NOTYPE")) { return STT_NOTYPE; } else if (IS(s.p, s.n, "@tls_object") || IS(s.p, s.n, "STT_TLS")) { return STT_TLS; } else { Fail(a, "unknown symbol type: %.*s", s.n, s.p); } } static void OnType(struct Assembler *a) { int i; i = GetSymbol(a, a->things.p[a->i++].i); a->symbols.p[i].type = SymbolType(a, GetString(a)); } static void OnSize(struct Assembler *a) { int i; i = GetSymbol(a, a->things.p[a->i++].i); a->symbols.p[i].size = GetInt(a); } static void OnInternal(struct Assembler *a) { int i; while (IsSlice(a, a->i)) { i = GetSymbol(a, a->things.p[a->i++].i); a->symbols.p[i].stv = STV_INTERNAL; } } static void OnHidden(struct Assembler *a) { int i; while (IsSlice(a, a->i)) { i = GetSymbol(a, a->things.p[a->i++].i); a->symbols.p[i].stv = STV_HIDDEN; } } static void OnProtected(struct Assembler *a) { int i; while (IsSlice(a, a->i)) { i = GetSymbol(a, a->things.p[a->i++].i); a->symbols.p[i].stv = STV_PROTECTED; } } static void OnLocal(struct Assembler *a) { int i; while (IsSlice(a, a->i)) { i = GetSymbol(a, a->things.p[a->i++].i); a->symbols.p[i].stb = STB_LOCAL; } } static void OnWeak(struct Assembler *a) { int i; while (IsSlice(a, a->i)) { i = GetSymbol(a, a->things.p[a->i++].i); a->symbols.p[i].stb = STB_WEAK; } } static void OnGlobal(struct Assembler *a) { int i; while (IsSlice(a, a->i)) { i = GetSymbol(a, a->things.p[a->i++].i); a->symbols.p[i].stb = STB_GLOBAL; } } static bool IsSizableOp(const char *op, struct Slice s) { size_t n = strlen(op); if (n == s.n) return !memcmp(op, s.p, n); if (n + 1 == s.n && !memcmp(op, s.p, n)) { switch (s.p[n]) { case 'b': case 'B': case 'w': case 'W': case 'l': case 'L': case 'q': case 'Q': return true; default: break; } } return false; } static int GetOpSize(struct Assembler *a, struct Slice s) { switch (s.p[s.n - 1]) { case 'b': case 'B': return 0; case 'w': case 'W': return 1; case 'l': case 'L': return 2; case 'q': case 'Q': return 3; default: Fail(a, "could not size instruction"); } } static int CompareString8(const char a[8], const char b[8]) { uint64_t x, y; x = READ64BE(a); y = READ64BE(b); return x > y ? 1 : x < y ? -1 : 0; } static bool FindReg(const char *p, size_t n, struct Reg *out_reg) { char key[8]; int i, c, m, l, r; if (n && n <= 8) { if (*p == '%') ++p, --n; memset(key, 0, sizeof(key)); for (i = 0; i < n; ++i) { key[i] = tolower(p[i]); } l = 0; r = ARRAYLEN(kRegs) - 1; while (l <= r) { m = (l + r) >> 1; c = CompareString8(kRegs[m].s, key); if (c < 0) { l = m + 1; } else if (c > 0) { r = m - 1; } else { *out_reg = kRegs[m]; return true; } } } return false; } static int RemoveRexw(int x) { if (x == -1) return x; x &= ~0x0800; if (((x & 0xff00) >> 8) == REX) x &= ~0xff00; return x; } static int FindRegReg(struct Slice s) { struct Reg reg; if (!FindReg(s.p, s.n, ®)) return -1; return reg.reg; } static int FindRegRm(struct Slice s) { struct Reg reg; if (!FindReg(s.p, s.n, ®)) return -1; return reg.rm; } static int FindRegBase(struct Slice s) { struct Reg reg; if (!FindReg(s.p, s.n, ®)) return -1; return reg.base; } static int FindRegIndex(struct Slice s) { struct Reg reg; if (!FindReg(s.p, s.n, ®)) return -1; return reg.index; } static wontreturn void InvalidRegister(struct Assembler *a) { Fail(a, "invalid register"); } static int GetRegisterReg(struct Assembler *a) { int reg; struct Slice wut; if ((reg = FindRegReg(GetString(a))) == -1) InvalidRegister(a); return reg; } static int GetRegisterRm(struct Assembler *a) { int reg; struct Slice wut; if ((reg = FindRegRm(GetString(a))) == -1) InvalidRegister(a); return reg; } static int ParseMemory(struct Assembler *a, long *disp) { /* ┌isrip │┌hasindex ││┌hasbase │││┌hasasz ││││┌rex │││││ ┌scale │││││ │ ┌index │││││ │ │ ┌base ││││├──────┐├┐├─┐├─┐ 0b00000000000000000000000000000000*/ struct Slice str; int reg, scale, mem = 0; *disp = IsInt(a, a->i) ? GetInt(a) : 0; if (IsPunct(a, a->i, '(')) { ++a->i; if ((str = GetString(a)).n) { mem |= HASBASE; if (!strncasecmp(str.p, "%rip", str.n)) { mem |= ISRIP; } else { reg = FindRegBase(str); if (reg == -1) InvalidRegister(a); mem |= reg & 007; // reg mem |= reg & 0xff00; // rex if (((reg & 070) >> 3) == 2) mem |= HASASZ; // asz } } if (!IsPunct(a, a->i, ')')) { mem |= HASINDEX; reg = FindRegIndex(GetString(a)); if (reg == -1) InvalidRegister(a); mem |= (reg & 007) << 3; // index mem |= reg & 0xff00; // rex if (((reg & 070) >> 3) == 2) mem |= HASASZ; // asz if (!IsPunct(a, a->i, ')')) { mem |= (bsr(GetInt(a)) & 3) << 6; } } else { mem |= 4 << 3; // rsp index (hint: there is none) } ConsumePunct(a, ')'); } return mem; } static void EncodeModrm(struct Assembler *a, int reg, int mem, long disp) { reg &= 7; reg <<= 3; if (mem & (HASBASE | HASINDEX)) { if (mem & ISRIP) { Emit(a, 005 | reg); } else { Emit(a, 0204 | reg); // suboptimal Emit(a, mem); } } else { Emit(a, 004 | reg); Emit(a, 045); } EmitLong(a, disp); } static void OnMov(struct Assembler *a, struct Slice op) { long imm, disp; int reg, rm, mem, modrm; if (IsPunct(a, a->i, '$')) { ++a->i; imm = GetInt(a); if (IsSlice(a, a->i)) { // imm -> reg reg = GetRegisterRm(a); if (reg & 0xff00) { Emit(a, reg >> 8); } switch ((reg & 070) >> 3) { case 0: Emit(a, 0xb0 + (reg & 7)); Emit(a, imm); break; case 1: Emit(a, OSZ); Emit(a, 0xb8 + (reg & 7)); EmitWord(a, imm); break; case 2: Emit(a, 0xb8 + (reg & 7)); EmitLong(a, imm); break; case 3: Emit(a, 0xb8 + (reg & 7)); // suboptimal EmitQuad(a, imm); break; default: Fail(a, "todo movd/movq"); } } else { // imm -> mem mem = ParseMemory(a, &disp); if (mem & 0xff00) { Emit(a, mem >> 8); } switch (GetOpSize(a, op)) { case 0: Emit(a, 0xc6); EncodeModrm(a, 0, mem, disp); Emit(a, imm); break; case 1: Emit(a, OSZ); Emit(a, 0xc7); EncodeModrm(a, 0, mem, disp); EmitWord(a, imm); break; case 2: Emit(a, 0xc7); EncodeModrm(a, 0, mem, disp); EmitLong(a, imm); break; case 3: Emit(a, 0xc7); // suboptimal EncodeModrm(a, 0, mem, disp); EmitQuad(a, imm); break; default: unreachable; } } } else if (IsSlice(a, a->i)) { reg = GetRegisterReg(a); if (IsSlice(a, a->i)) { // reg -> reg rm = GetRegisterRm(a); if (((reg & 070) >> 3) != ((rm & 070) >> 3)) { Fail(a, "size mismatch"); } if ((reg | rm) & 0xff00) { Emit(a, (reg | rm) >> 8); } modrm = 0300 | (reg & 7) << 3 | rm & 7; switch ((reg & 070) >> 3) { case 0: Emit(a, 0x88); Emit(a, modrm); break; case 1: Emit(a, OSZ); Emit(a, 0x89); Emit(a, modrm); break; case 2: case 3: Emit(a, 0x89); Emit(a, modrm); break; case 4: Emit(a, 0x66); Emit(a, 0x0F); Emit(a, 0x6F); Emit(a, modrm); break; default: unreachable; } } else { // reg -> mem mem = ParseMemory(a, &disp); if ((reg | mem) & 0xff00) { Emit(a, (reg | mem) >> 8); } modrm = 0300 | (reg & 7) << 3 | rm & 7; switch ((reg & 070) >> 3) { case 0: Emit(a, 0x88); EncodeModrm(a, reg, mem, 0); break; case 1: Emit(a, OSZ); Emit(a, 0x89); EncodeModrm(a, reg, mem, 0); break; case 2: case 3: Emit(a, 0x89); EncodeModrm(a, reg, mem, 0); break; default: Fail(a, "todo movdqu"); } } } else { // mem -> reg mem = ParseMemory(a, &disp); reg = GetRegisterReg(a); if ((reg | mem) & 0xff00) { Emit(a, (reg | mem) >> 8); } modrm = 0300 | (reg & 7) << 3 | rm & 7; switch ((reg & 070) >> 3) { case 0: Emit(a, 0x8A); EncodeModrm(a, reg, mem, 0); break; case 1: Emit(a, OSZ); Emit(a, 0x8B); EncodeModrm(a, reg, mem, 0); break; case 2: case 3: Emit(a, 0x8B); EncodeModrm(a, reg, mem, 0); break; default: Fail(a, "todo movdqu"); } } } static void OnPush(struct Assembler *a) { long x; int reg; if (IsPunct(a, a->i, '$')) { ++a->i; x = GetInt(a); if (-128 <= x && x <= 127) { Emit(a, 0x6A); Emit(a, x); } else { Emit(a, 0x68); EmitLong(a, x); } } else { reg = RemoveRexw(GetRegisterRm(a)); if (reg & 0xff00) Emit(a, reg >> 8); if (((reg & 070) >> 3) == 1) Emit(a, OSZ); Emit(a, 0x50 + (reg & 7)); } } static void OnPop(struct Assembler *a) { int reg; reg = RemoveRexw(GetRegisterRm(a)); if (reg & 0xff00) Emit(a, reg >> 8); if (((reg & 070) >> 3) == 1) Emit(a, OSZ); Emit(a, 0x58 + (reg & 7)); } static void OnRet(struct Assembler *a) { if (IsPunct(a, a->i, '$')) { ++a->i; Emit(a, 0xC2); EmitWord(a, GetInt(a)); } else { Emit(a, 0xC3); } } static void OnLeave(struct Assembler *a) { Emit(a, 0xC9); } static void OnHlt(struct Assembler *a) { Emit(a, 0xF4); } static void OnCmc(struct Assembler *a) { Emit(a, 0xF5); } static void OnClc(struct Assembler *a) { Emit(a, 0xF8); } static void OnStc(struct Assembler *a) { Emit(a, 0xF9); } static void OnCli(struct Assembler *a) { Emit(a, 0xFA); } static void OnSti(struct Assembler *a) { Emit(a, 0xFB); } static void OnCld(struct Assembler *a) { Emit(a, 0xFC); } static void OnStd(struct Assembler *a) { Emit(a, 0xFD); } static void OnLodsb(struct Assembler *a) { Emit(a, 0xAC); } static void OnLodsw(struct Assembler *a) { Emit(a, OSZ); Emit(a, 0xAD); } static void OnLodsl(struct Assembler *a) { Emit(a, 0xAD); } static void OnLodsq(struct Assembler *a) { Emit(a, REXW); Emit(a, 0xAD); } static void OnStosb(struct Assembler *a) { Emit(a, 0xAA); } static void OnStosw(struct Assembler *a) { Emit(a, OSZ); Emit(a, 0xAB); } static void OnStosl(struct Assembler *a) { Emit(a, 0xAB); } static void OnStosq(struct Assembler *a) { Emit(a, REXW); Emit(a, 0xAB); } static void OnMovsb(struct Assembler *a) { Emit(a, 0xA4); } static void OnMovsw(struct Assembler *a) { Emit(a, OSZ); Emit(a, 0xA5); } static void OnMovsl(struct Assembler *a) { Emit(a, 0xA5); } static void OnMovsq(struct Assembler *a) { Emit(a, REXW); Emit(a, 0xA5); } static bool Prefix(struct Assembler *a, const char *s, size_t n) { int i; char key[8]; if (n <= 8) { memset(key, 0, 8); for (i = 0; i < n; ++i) key[i] = tolower(s[i]); for (i = 0; i < sizeof(kPrefix) / sizeof(*kPrefix); i++) { if (!memcmp(key, kPrefix[i], 8)) { Emit(a, kPrefixByte[i]); return true; } } } return false; } static void Directive(struct Assembler *a) { struct Slice s; for (;;) { s = GetString(a); if (!Prefix(a, s.p, s.n)) break; } if (s.n >= 1 && s.p[0] == '.') { if (IS(s.p, s.n, ".zero")) { OnZero(a); } else if (IS(s.p, s.n, ".align") || IS(s.p, s.n, ".balign")) { OnAlign(a); } else if (IS(s.p, s.n, ".byte")) { OnByte(a); } else if (IS(s.p, s.n, ".word") || IS(s.p, s.n, ".short")) { OnWord(a); } else if (IS(s.p, s.n, ".long")) { OnLong(a); } else if (IS(s.p, s.n, ".quad")) { OnQuad(a); } else if (IS(s.p, s.n, ".float")) { OnFloat(a); } else if (IS(s.p, s.n, ".double")) { OnDouble(a); } else if (IS(s.p, s.n, ".ascii")) { OnAscii(a); } else if (IS(s.p, s.n, ".asciz")) { OnAsciz(a); } else if (IS(s.p, s.n, ".text")) { OnText(a); } else if (IS(s.p, s.n, ".data")) { OnData(a); } else if (IS(s.p, s.n, ".bss")) { OnBss(a); } else if (IS(s.p, s.n, ".previous")) { OnPrevious(a); } else if (IS(s.p, s.n, ".section")) { OnSection(a); } else if (IS(s.p, s.n, ".abort")) { OnAbort(a); } else if (IS(s.p, s.n, ".err")) { OnErr(a); } else if (IS(s.p, s.n, ".error")) { OnError(a); } else if (IS(s.p, s.n, ".fill") || IS(s.p, s.n, ".space")) { OnFill(a); } else if (IS(s.p, s.n, ".type")) { OnType(a); } else if (IS(s.p, s.n, ".size")) { OnSize(a); } else if (IS(s.p, s.n, ".local")) { OnLocal(a); } else if (IS(s.p, s.n, ".internal")) { OnInternal(a); } else if (IS(s.p, s.n, ".weak")) { OnWeak(a); } else if (IS(s.p, s.n, ".hidden")) { OnHidden(a); } else if (IS(s.p, s.n, ".globl") || IS(s.p, s.n, ".global")) { OnGlobal(a); } else if (IS(s.p, s.n, ".protected")) { OnProtected(a); } else if (IS(s.p, s.n, ".incbin")) { OnIncbin(a); } else { Fail(a, "unexpected directive: %.*s", s.n, s.p); } } else if (IS(s.p, s.n, "ret")) { OnRet(a); } else if (IS(s.p, s.n, "leave")) { OnLeave(a); } else if (IS(s.p, s.n, "push")) { OnPush(a); } else if (IS(s.p, s.n, "pop")) { OnPop(a); } else if (IS(s.p, s.n, "hlt")) { OnHlt(a); } else if (IS(s.p, s.n, "cmc")) { OnCmc(a); } else if (IS(s.p, s.n, "clc")) { OnClc(a); } else if (IS(s.p, s.n, "stc")) { OnStc(a); } else if (IS(s.p, s.n, "cli")) { OnCli(a); } else if (IS(s.p, s.n, "sti")) { OnSti(a); } else if (IS(s.p, s.n, "cld")) { OnCld(a); } else if (IS(s.p, s.n, "std")) { OnStd(a); } else if (IS(s.p, s.n, "stosb")) { OnStosb(a); } else if (IS(s.p, s.n, "stosw")) { OnStosw(a); } else if (IS(s.p, s.n, "stosl")) { OnStosl(a); } else if (IS(s.p, s.n, "stosq")) { OnStosq(a); } else if (IS(s.p, s.n, "lodsb")) { OnLodsb(a); } else if (IS(s.p, s.n, "lodsw")) { OnLodsw(a); } else if (IS(s.p, s.n, "lodsl")) { OnLodsl(a); } else if (IS(s.p, s.n, "lodsq")) { OnLodsq(a); } else if (IS(s.p, s.n, "movsb")) { OnMovsb(a); } else if (IS(s.p, s.n, "movsw")) { OnMovsw(a); } else if (IS(s.p, s.n, "movsl")) { OnMovsl(a); } else if (IS(s.p, s.n, "movsq")) { OnMovsq(a); } else if (IsSizableOp("mov", s)) { OnMov(a, s); } else { Fail(a, "unexpected op: %.*s", s.n, s.p); } ConsumePunct(a, ';'); } static void Assemble(struct Assembler *a) { while (a->i < a->things.n) { if (IsPunct(a, a->i, ';')) { ++a->i; continue; } switch (a->things.p[a->i].t) { case TT_SLICE: if (IsPunct(a, a->i + 1, ':')) { Label(a, a->things.p[a->i].i); } else { Directive(a); } break; case TT_INT: if (IsPunct(a, a->i + 1, ':')) { LocalLabel(a, a->ints.p[a->things.p[a->i].i]); } // fallthrough default: Fail(a, "unexpected token"); } } } static void Objectify(struct Assembler *a, const char *path) { size_t i, j; struct ElfWriter *elf; elf = elfwriter_open(path, 0644); for (i = 0; i < a->sections.n; ++i) { elfwriter_align(elf, a->sections.p[i].align, 0); elfwriter_startsection(elf, a->sections.p[i].name, a->sections.p[i].type, a->sections.p[i].flags); for (j = 0; j < a->symbols.n; ++j) { if (a->symbols.p[j].section != i) continue; elfwriter_appendsym( elf, strndup(a->slices.p[a->symbols.p[j].name].p, a->slices.p[a->symbols.p[j].name].n), ELF64_ST_INFO(a->symbols.p[j].stb, a->symbols.p[j].type), a->symbols.p[j].stv, a->symbols.p[j].location, a->symbols.p[j].size); } memcpy(elfwriter_reserve(elf, a->sections.p[i].binary.n), a->sections.p[i].binary.p, a->sections.p[i].binary.n); elfwriter_commit(elf, a->sections.p[i].binary.n); elfwriter_finishsection(elf); } elfwriter_close(elf); } static void PrintThings(struct Assembler *a) { int i; char fbuf[32]; for (i = 0; i < a->things.n; ++i) { printf("%s:%d:: ", a->sauces.p[a->things.p[i].s].path, a->sauces.p[a->things.p[i].s].line); switch (a->things.p[i].t) { case TT_INT: printf("TT_INT %ld\n", a->ints.p[a->things.p[i].i]); break; case TT_FLOAT: g_xfmt_p(fbuf, &a->floats.p[a->things.p[i].i], 19, sizeof(fbuf), 0); printf("TT_FLOAT %s\n", fbuf); break; case TT_SLICE: printf("TT_SLICE %`'.*s\n", a->slices.p[a->things.p[i].i].n, a->slices.p[a->things.p[i].i].p); break; case TT_PUNCT: printf("TT_PUNCT %`'c\n", a->things.p[i].i); break; default: unreachable; } } } void Assembler(int argc, char *argv[]) { struct Assembler *a; showcrashreports(); if (argc == 1) { system("o//third_party/chibicc/as.com -o /tmp/o third_party/chibicc/hog.s"); system("objdump -wxd /tmp/o"); exit(0); } ReadFlags(argc, argv); a = NewAssembler(); Tokenize(a, g_input_path); /* PrintThings(a); */ Assemble(a); Objectify(a, g_output_path); } int main(int argc, char *argv[]) { Assembler(argc, argv); return 0; }