Browse Source

Integrate more chibicc changes

main
Justine Tunney 2 years ago
parent
commit
15280753e2
  1. 44
      third_party/chibicc/alloc.c
  2. 9
      third_party/chibicc/chibicc.c
  3. 37
      third_party/chibicc/chibicc.h
  4. 10
      third_party/chibicc/codegen.c
  5. 14
      third_party/chibicc/hashmap.c
  6. 162
      third_party/chibicc/parse.c
  7. 62
      third_party/chibicc/preprocess.c
  8. 4
      third_party/chibicc/test/hog_test.c
  9. 9
      third_party/chibicc/test/initializer_test.c
  10. 18
      third_party/chibicc/test/struct_test.c
  11. 94
      third_party/chibicc/tokenize.c
  12. 4
      third_party/chibicc/type.c
  13. 8
      third_party/chibicc/unicode.c

44
third_party/chibicc/alloc.c vendored

@ -0,0 +1,44 @@ @@ -0,0 +1,44 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "third_party/chibicc/chibicc.h"
long alloc_node_count;
long alloc_token_count;
long alloc_obj_count;
long alloc_type_count;
Node *alloc_node(void) {
++alloc_node_count;
return calloc(1, sizeof(Node));
}
Token *alloc_token(void) {
++alloc_token_count;
return calloc(1, sizeof(Token));
}
Obj *alloc_obj(void) {
++alloc_obj_count;
return calloc(1, sizeof(Obj));
}
Type *alloc_type(void) {
++alloc_type_count;
return calloc(1, sizeof(Type));
}

9
third_party/chibicc/chibicc.c vendored

@ -141,7 +141,16 @@ static char *quote_makefile(char *s) { @@ -141,7 +141,16 @@ static char *quote_makefile(char *s) {
static void PrintMemoryUsage(void) {
struct mallinfo mi;
mi = mallinfo();
fprintf(stderr, "\n");
fprintf(stderr, "allocated %,ld bytes of memory\n", mi.arena);
fprintf(stderr, "allocated %,ld nodes (%,ld bytes)\n", alloc_node_count,
sizeof(Node) * alloc_node_count);
fprintf(stderr, "allocated %,ld tokens (%,ld bytes)\n", alloc_token_count,
sizeof(Token) * alloc_token_count);
fprintf(stderr, "allocated %,ld objs (%,ld bytes)\n", alloc_obj_count,
sizeof(Obj) * alloc_obj_count);
fprintf(stderr, "allocated %,ld types (%,ld bytes)\n", alloc_type_count,
sizeof(Type) * alloc_type_count);
}
static void strarray_push_comma(StringArray *a, char *s) {

37
third_party/chibicc/chibicc.h vendored

@ -62,14 +62,14 @@ void strarray_push(StringArray *, char *); @@ -62,14 +62,14 @@ void strarray_push(StringArray *, char *);
// tokenize.c
//
// Token
typedef enum {
TK_RESERVED, // Keywords or punctuators
TK_IDENT, // Identifiers
TK_STR, // String literals
TK_NUM, // Numeric literals
TK_PP_NUM, // Preprocessing numbers
TK_EOF, // End-of-file markers
TK_IDENT, // Identifiers
TK_PUNCT, // Punctuators
TK_KEYWORD, // Keywords
TK_STR, // String literals
TK_NUM, // Numeric literals
TK_PP_NUM, // Preprocessing numbers
TK_EOF, // End-of-file markers
} TokenKind;
struct File {
@ -81,15 +81,14 @@ struct File { @@ -81,15 +81,14 @@ struct File {
int line_delta;
};
// Token type
struct Token {
TokenKind kind; // Token kind
struct thatispacked Token {
Token *next; // Next token
int len; // Token length
int line_no; // Line number
int line_delta; // Line number
TokenKind kind; // Token kind
bool at_bol; // True if this token is at beginning of line
bool has_space; // True if this token follows a space character
Token *next; // Next token
char *loc; // Token location
Type *ty; // Used if TK_NUM or TK_STR
File *file; // Source location
@ -518,7 +517,7 @@ int encode_utf8(char *, uint32_t); @@ -518,7 +517,7 @@ int encode_utf8(char *, uint32_t);
uint32_t decode_utf8(char **, char *);
bool is_ident1(uint32_t);
bool is_ident2(uint32_t);
int str_width(char *, int);
int display_width(char *, int);
//
// hashmap.c
@ -564,6 +563,20 @@ extern bool opt_sse4; @@ -564,6 +563,20 @@ extern bool opt_sse4;
extern bool opt_verbose;
extern char *base_file;
//
// alloc.c
//
extern long alloc_node_count;
extern long alloc_token_count;
extern long alloc_obj_count;
extern long alloc_type_count;
Node *alloc_node(void);
Token *alloc_token(void);
Obj *alloc_obj(void);
Type *alloc_type(void);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_THIRD_PARTY_CHIBICC_CHIBICC_H_ */

10
third_party/chibicc/codegen.c vendored

@ -2219,7 +2219,7 @@ static void emit_data(Obj *prog) { @@ -2219,7 +2219,7 @@ static void emit_data(Obj *prog) {
int align = (var->ty->kind == TY_ARRAY && var->ty->size >= 16)
? MAX(16, var->align)
: var->align;
if (opt_common && var->is_tentative && !var->is_tls) {
if (opt_common && var->is_tentative) {
println("\t.comm\t%s,%d,%d", nameof(var), var->ty->size, align);
} else {
if (var->section) {
@ -2410,10 +2410,10 @@ static void emit_text(Obj *prog) { @@ -2410,10 +2410,10 @@ static void emit_text(Obj *prog) {
// Emit code
gen_stmt(fn->body);
assert(!depth);
// The C spec defines a special rule for the main function.
// Reaching the end of the main function is equivalent to
// returning 0, even though the behavior is undefined for the
// other functions. See C11 5.1.2.2.3.
// [https://www.sigbus.info/n1570#5.1.2.2.3p1] The C spec defines
// a special rule for the main function. Reaching the end of the
// main function is equivalent to returning 0, even though the
// behavior is undefined for the other functions.
if (strcmp(nameof(fn), "main") == 0) {
emitlin("\txor\t%eax,%eax");
}

14
third_party/chibicc/hashmap.c vendored

@ -2,7 +2,10 @@ @@ -2,7 +2,10 @@
#include "third_party/chibicc/chibicc.h"
#define TOMBSTONE ((void *)-1) // Represents a deleted hash entry
#define INIT_SIZE 16 // initial hash bucket size
#define LOW_WATERMARK 50 // keep usage below 50% after rehashing
#define HIGH_WATERMARK 70 // perform rehash when usage exceeds 70%
#define TOMBSTONE ((void *)-1) // represents deleted hash table entry
static uint64_t fnv_hash(char *s, int len) {
uint64_t hash = 0xcbf29ce484222325;
@ -24,7 +27,8 @@ static void rehash(HashMap *map) { @@ -24,7 +27,8 @@ static void rehash(HashMap *map) {
}
}
size_t cap = map->capacity;
while ((nkeys * 100) / cap >= 50) cap = cap * 2;
while ((nkeys * 100) / cap >= LOW_WATERMARK) cap = cap * 2;
assert(cap > 0);
// Create a new hashmap and copy all key-values.
HashMap map2 = {};
map2.buckets = calloc(cap, sizeof(HashEntry));
@ -56,9 +60,11 @@ static HashEntry *get_entry(HashMap *map, char *key, int keylen) { @@ -56,9 +60,11 @@ static HashEntry *get_entry(HashMap *map, char *key, int keylen) {
static HashEntry *get_or_insert_entry(HashMap *map, char *key, int keylen) {
if (!map->buckets) {
map->buckets = calloc((map->capacity = 16), sizeof(HashEntry));
map->buckets = calloc(INIT_SIZE, sizeof(HashEntry));
map->capacity = INIT_SIZE;
} else if ((map->used * 100) / map->capacity >= HIGH_WATERMARK) {
rehash(map);
}
if ((map->used * 100) / map->capacity >= 70) rehash(map);
uint64_t hash = fnv_hash(key, keylen);
for (int i = 0; i < map->capacity; i++) {
HashEntry *ent = &map->buckets[(hash + i) & (map->capacity - 1)];

162
third_party/chibicc/parse.c vendored

@ -25,21 +25,13 @@ typedef struct Scope Scope; @@ -25,21 +25,13 @@ typedef struct Scope Scope;
// Scope for local variables, global variables, typedefs
// or enum constants
typedef struct {
char *name;
int depth;
Obj *var;
Type *type_def;
Type *enum_ty;
int enum_val;
} VarScope;
// Scope for struct, union or enum tags
typedef struct {
char *name;
int depth;
Type *ty;
} TagScope;
// Represents a block scope.
struct Scope {
Scope *next;
// C has two block scopes; one is for variables/typedefs and
@ -103,10 +95,6 @@ static Obj *globals; @@ -103,10 +95,6 @@ static Obj *globals;
static Scope *scope = &(Scope){};
// scope_depth is incremented by one at the beginning of a block
// scope and decremented by one at the end of a block scope.
static int scope_depth;
// Points to the function object the parser is currently parsing.
static Obj *current_fn;
@ -173,12 +161,10 @@ static void enter_scope(void) { @@ -173,12 +161,10 @@ static void enter_scope(void) {
Scope *sc = calloc(1, sizeof(Scope));
sc->next = scope;
scope = sc;
scope_depth++;
}
static void leave_scope(void) {
scope = scope->next;
scope_depth--;
}
// Find a variable by name.
@ -190,16 +176,16 @@ static VarScope *find_var(Token *tok) { @@ -190,16 +176,16 @@ static VarScope *find_var(Token *tok) {
return NULL;
}
static TagScope *find_tag(Token *tok) {
static Type *find_tag(Token *tok) {
for (Scope *sc = scope; sc; sc = sc->next) {
TagScope *sc2 = hashmap_get2(&sc->tags, tok->loc, tok->len);
if (sc2) return sc2;
Type *ty = hashmap_get2(&sc->tags, tok->loc, tok->len);
if (ty) return ty;
}
return NULL;
}
Node *new_node(NodeKind kind, Token *tok) {
Node *node = calloc(1, sizeof(Node));
Node *node = alloc_node();
node->kind = kind;
node->tok = tok;
return node;
@ -252,7 +238,7 @@ static Node *new_vla_ptr(Obj *var, Token *tok) { @@ -252,7 +238,7 @@ static Node *new_vla_ptr(Obj *var, Token *tok) {
Node *new_cast(Node *expr, Type *ty) {
add_type(expr);
Node *node = calloc(1, sizeof(Node));
Node *node = alloc_node();
node->kind = ND_CAST;
node->tok = expr->tok;
node->lhs = expr;
@ -262,8 +248,6 @@ Node *new_cast(Node *expr, Type *ty) { @@ -262,8 +248,6 @@ Node *new_cast(Node *expr, Type *ty) {
static VarScope *push_scope(char *name) {
VarScope *sc = calloc(1, sizeof(VarScope));
sc->name = name;
sc->depth = scope_depth;
hashmap_put(&scope->vars, name, sc);
return sc;
}
@ -303,7 +287,7 @@ static Initializer *new_initializer(Type *ty, bool is_flexible) { @@ -303,7 +287,7 @@ static Initializer *new_initializer(Type *ty, bool is_flexible) {
}
static Obj *new_var(char *name, Type *ty) {
Obj *var = calloc(1, sizeof(Obj));
Obj *var = alloc_obj();
var->name = name;
var->ty = ty;
var->align = ty->align;
@ -330,9 +314,7 @@ static Obj *new_gvar(char *name, Type *ty) { @@ -330,9 +314,7 @@ static Obj *new_gvar(char *name, Type *ty) {
static char *new_unique_name(void) {
static int id = 0;
char *buf = calloc(1, 20);
sprintf(buf, ".L..%d", id++);
return buf;
return xasprintf(".L..%d", id++);
}
static Obj *new_anon_gvar(Type *ty) {
@ -360,11 +342,7 @@ static Type *find_typedef(Token *tok) { @@ -360,11 +342,7 @@ static Type *find_typedef(Token *tok) {
}
static void push_tag_scope(Token *tok, Type *ty) {
TagScope *sc = calloc(1, sizeof(TagScope));
sc->name = strndup(tok->loc, tok->len);
sc->depth = scope_depth;
sc->ty = ty;
hashmap_put2(&scope->tags, tok->loc, tok->len, sc);
hashmap_put2(&scope->tags, tok->loc, tok->len, ty);
}
// Consumes token if equal to STR or __STR__.
@ -599,9 +577,14 @@ static Token *thing_attributes(Token *tok, void *arg) { @@ -599,9 +577,14 @@ static Token *thing_attributes(Token *tok, void *arg) {
error_tok(tok, "unknown function attribute");
}
// typespec = typename typename*
// typename = "void" | "_Bool" | "char" | "short" | "int" | "long"
// | struct-decl | union-decl | typedef-name
// declspec = ("void" | "_Bool" | "char" | "short" | "int" | "long"
// | "typedef" | "static" | "extern" | "inline"
// | "_Thread_local" | "__thread"
// | "signed" | "unsigned"
// | struct-decl | union-decl | typedef-name
// | enum-specifier | typeof-specifier
// | "const" | "volatile" | "auto" | "register" | "restrict"
// | "__restrict" | "__restrict__" | "_Noreturn")+
//
// The order of typenames in a type-specifier doesn't matter. For
// example, `int long static` means the same as `static long int`.
@ -614,7 +597,7 @@ static Token *thing_attributes(Token *tok, void *arg) { @@ -614,7 +597,7 @@ static Token *thing_attributes(Token *tok, void *arg) {
// while keeping the "current" type object that the typenames up
// until that point represent. When we reach a non-typename token,
// we returns the current type object.
static Type *typespec(Token **rest, Token *tok, VarAttr *attr) {
static Type *declspec(Token **rest, Token *tok, VarAttr *attr) {
// We use a single integer as counters for all typenames.
// For example, bits 0 and 1 represents how many times we saw the
// keyword "void" so far. With this, we can use a switch statement
@ -851,7 +834,7 @@ static Token *static_assertion(Token *tok) { @@ -851,7 +834,7 @@ static Token *static_assertion(Token *tok) {
}
// func-params = ("void" | param ("," param)* ("," "...")?)? ")"
// param = typespec declarator
// param = declspec declarator
static Type *func_params(Token **rest, Token *tok, Type *ty) {
if (EQUAL(tok, "void") && EQUAL(tok->next, ")")) {
*rest = tok->next->next;
@ -868,7 +851,7 @@ static Type *func_params(Token **rest, Token *tok, Type *ty) { @@ -868,7 +851,7 @@ static Type *func_params(Token **rest, Token *tok, Type *ty) {
skip(tok, ')');
break;
}
Type *ty2 = typespec(&tok, tok, NULL);
Type *ty2 = declspec(&tok, tok, NULL);
ty2 = declarator(&tok, tok, ty2);
Token *name = ty2->name;
if (ty2->kind == TY_ARRAY) {
@ -935,8 +918,8 @@ static Type *declarator(Token **rest, Token *tok, Type *ty) { @@ -935,8 +918,8 @@ static Type *declarator(Token **rest, Token *tok, Type *ty) {
ty = pointers(&tok, tok, ty);
if (EQUAL(tok, "(")) {
Token *start = tok;
Type ignore = {};
declarator(&tok, tok->next, &ignore);
Type dummy = {};
declarator(&tok, start->next, &dummy);
tok = skip(tok, ')');
ty = type_suffix(rest, tok, ty);
ty = declarator(&tok, start->next, ty);
@ -959,8 +942,8 @@ static Type *abstract_declarator(Token **rest, Token *tok, Type *ty) { @@ -959,8 +942,8 @@ static Type *abstract_declarator(Token **rest, Token *tok, Type *ty) {
ty = pointers(&tok, tok, ty);
if (EQUAL(tok, "(")) {
Token *start = tok;
Type ignore = {};
abstract_declarator(&tok, tok->next, &ignore);
Type dummy = {};
abstract_declarator(&tok, start->next, &dummy);
tok = skip(tok, ')');
ty = type_suffix(rest, tok, ty);
return abstract_declarator(&tok, start->next, ty);
@ -968,9 +951,9 @@ static Type *abstract_declarator(Token **rest, Token *tok, Type *ty) { @@ -968,9 +951,9 @@ static Type *abstract_declarator(Token **rest, Token *tok, Type *ty) {
return type_suffix(rest, tok, ty);
}
// type-name = typespec abstract-declarator
// type-name = declspec abstract-declarator
static Type *typename(Token **rest, Token *tok) {
Type *ty = typespec(&tok, tok, NULL);
Type *ty = declspec(&tok, tok, NULL);
return abstract_declarator(rest, tok, ty);
}
@ -1003,11 +986,11 @@ static Type *enum_specifier(Token **rest, Token *tok) { @@ -1003,11 +986,11 @@ static Type *enum_specifier(Token **rest, Token *tok) {
tok = tok->next;
}
if (tag && !EQUAL(tok, "{")) {
TagScope *sc = find_tag(tag);
if (!sc) error_tok(tag, "unknown enum type");
if (sc->ty->kind != TY_ENUM) error_tok(tag, "not an enum tag");
Type *ty = find_tag(tag);
if (!ty) error_tok(tag, "unknown enum type");
if (ty->kind != TY_ENUM) error_tok(tag, "not an enum tag");
*rest = tok;
return sc->ty;
return ty;
}
tok = skip(tok, '{');
// Read an enum-list.
@ -1070,8 +1053,8 @@ static Node *new_alloca(Node *sz) { @@ -1070,8 +1053,8 @@ static Node *new_alloca(Node *sz) {
return node;
}
// declaration = typespec (declarator ("=" expr)? ("," declarator ("="
// expr)?)*)? ";"
// declaration = declspec (declarator ("=" expr)?
// ("," declarator ("=" expr)?)*)? ";"
static Node *declaration(Token **rest, Token *tok, Type *basety,
VarAttr *attr) {
Node head = {};
@ -1363,9 +1346,11 @@ static void struct_initializer1(Token **rest, Token *tok, Initializer *init) { @@ -1363,9 +1346,11 @@ static void struct_initializer1(Token **rest, Token *tok, Initializer *init) {
// struct-initializer2 = initializer ("," initializer)*
static void struct_initializer2(Token **rest, Token *tok, Initializer *init,
Member *mem) {
bool first = true;
for (; mem && !is_end(tok); mem = mem->next) {
Token *start = tok;
if (mem != init->ty->members) tok = skip(tok, ',');
if (!first) tok = skip(tok, ',');
first = false;
if (EQUAL(tok, "[") || EQUAL(tok, ".")) {
*rest = start;
return;
@ -1389,6 +1374,7 @@ static void union_initializer(Token **rest, Token *tok, Initializer *init) { @@ -1389,6 +1374,7 @@ static void union_initializer(Token **rest, Token *tok, Initializer *init) {
init->mem = init->ty->members;
if (EQUAL(tok, "{")) {
initializer2(&tok, tok->next, init->children[0]);
CONSUME(&tok, tok, ",");
*rest = skip(tok, '}');
} else {
initializer2(rest, tok, init->children[0]);
@ -1769,7 +1755,7 @@ static Node *stmt(Token **rest, Token *tok) { @@ -1769,7 +1755,7 @@ static Node *stmt(Token **rest, Token *tok) {
brk_label = node->brk_label = new_unique_name();
cont_label = node->cont_label = new_unique_name();
if (is_typename(tok)) {
Type *basety = typespec(&tok, tok, NULL);
Type *basety = declspec(&tok, tok, NULL);
node->init = declaration(&tok, tok, basety, NULL);
} else {
node->init = expr_stmt(&tok, tok);
@ -1872,7 +1858,7 @@ static Node *compound_stmt(Token **rest, Token *tok) { @@ -1872,7 +1858,7 @@ static Node *compound_stmt(Token **rest, Token *tok) {
while (!EQUAL(tok, "}")) {
if (is_typename(tok) && !EQUAL(tok->next, ":")) {
VarAttr attr = {};
Type *basety = typespec(&tok, tok, &attr);
Type *basety = declspec(&tok, tok, &attr);
if (attr.is_typedef) {
tok = parse_typedef(tok, basety);
continue;
@ -2565,30 +2551,14 @@ static Node *mul(Token **rest, Token *tok) { @@ -2565,30 +2551,14 @@ static Node *mul(Token **rest, Token *tok) {
}
}
// compound-literal = initializer "}"
static Node *compound_literal(Token **rest, Token *tok, Type *ty,
Token *start) {
if (scope_depth == 0) {
Obj *var = new_anon_gvar(ty);
gvar_initializer(rest, tok, var);
return new_var_node(var, start);
}
Obj *var = new_lvar(new_unique_name(), ty);
Node *lhs = lvar_initializer(rest, tok, var);
Node *rhs = new_var_node(var, tok);
return new_binary(ND_COMMA, lhs, rhs, tok);
}
// cast = "(" type-name ")" "{" compound-literal
// | "(" type-name ")" cast
// | unary
// cast = "(" type-name ")" cast | unary
static Node *cast(Token **rest, Token *tok) {
if (EQUAL(tok, "(") && is_typename(tok->next)) {
Token *start = tok;
Type *ty = typename(&tok, tok->next);
tok = skip(tok, ')');
// compound literal
if (EQUAL(tok, "{")) return compound_literal(rest, tok, ty, start);
if (EQUAL(tok, "{")) return unary(rest, start);
// type cast
Node *node = new_cast(cast(rest, tok), ty);
node->tok = start;
@ -2612,9 +2582,10 @@ static Node *unary(Token **rest, Token *tok) { @@ -2612,9 +2582,10 @@ static Node *unary(Token **rest, Token *tok) {
return new_unary(ND_ADDR, lhs, tok);
}
if (EQUAL(tok, "*")) {
// [C18 6.5.3.2p4] This is an oddity in the C spec, but dereferencing
// a function shouldn't do anything. If foo is a function, `*foo`,
// `**foo` or `*****foo` are all equivalent to just `foo`.
// [https://www.sigbus.info/n1570#6.5.3.2p4] This is an oddity
// in the C spec, but dereferencing a function shouldn't do
// anything. If foo is a function, `*foo`, `**foo` or `*****foo`
// are all equivalent to just `foo`.
Node *node = cast(rest, tok->next);
add_type(node);
if (node->ty->kind == TY_FUNC) return node;
@ -2640,14 +2611,14 @@ static Node *unary(Token **rest, Token *tok) { @@ -2640,14 +2611,14 @@ static Node *unary(Token **rest, Token *tok) {
return postfix(rest, tok);
}
// struct-members = (typespec declarator ("," declarator)* ";")*
// struct-members = (declspec declarator ("," declarator)* ";")*
static void struct_members(Token **rest, Token *tok, Type *ty) {
Member head = {};
Member *cur = &head;
int idx = 0;
while (!EQUAL(tok, "}")) {
VarAttr attr = {};
Type *basety = typespec(&tok, tok, &attr);
Type *basety = declspec(&tok, tok, &attr);
bool first = true;
// Anonymous struct member
if ((basety->kind == TY_STRUCT || basety->kind == TY_UNION) &&
@ -2708,8 +2679,8 @@ static Type *struct_union_decl(Token **rest, Token *tok) { @@ -2708,8 +2679,8 @@ static Type *struct_union_decl(Token **rest, Token *tok) {
}
if (tag && !EQUAL(tok, "{")) {
*rest = tok;
TagScope *sc = find_tag(tag);
if (sc) return sc->ty;
Type *ty2 = find_tag(tag);
if (ty2) return ty2;
ty->size = -1;
push_tag_scope(tag, ty);
return ty;
@ -2721,10 +2692,10 @@ static Type *struct_union_decl(Token **rest, Token *tok) { @@ -2721,10 +2692,10 @@ static Type *struct_union_decl(Token **rest, Token *tok) {
if (tag) {
// If this is a redefinition, overwrite a previous type.
// Otherwise, register the struct type.
TagScope *sc = find_tag(tag);
if (sc && sc->depth == scope_depth) {
*sc->ty = *ty;
return sc->ty;
Type *ty2 = hashmap_get2(&scope->tags, tag->loc, tag->len);
if (ty2) {
*ty2 = *ty;
return ty2;
}
push_tag_scope(tag, ty);
}
@ -2837,7 +2808,8 @@ static Node *new_inc_dec(Node *node, Token *tok, int addend) { @@ -2837,7 +2808,8 @@ static Node *new_inc_dec(Node *node, Token *tok, int addend) {
node->ty);
}
// postfix = ident "(" func-args ")" postfix-tail*
// postfix = "(" type-name ")" "{" initializer-list "}"
// | ident "(" func-args ")" postfix-tail*
// | primary postfix-tail*
//
// postfix-tail = "[" expr "]"
@ -2847,6 +2819,21 @@ static Node *new_inc_dec(Node *node, Token *tok, int addend) { @@ -2847,6 +2819,21 @@ static Node *new_inc_dec(Node *node, Token *tok, int addend) {
// | "++"
// | "--"
static Node *postfix(Token **rest, Token *tok) {
if (EQUAL(tok, "(") && is_typename(tok->next)) {
// Compound literal
Token *start = tok;
Type *ty = typename(&tok, tok->next);
tok = skip(tok, ')');
if (scope->next == NULL) {
Obj *var = new_anon_gvar(ty);
gvar_initializer(rest, tok, var);
return new_var_node(var, start);
}
Obj *var = new_lvar("", ty);
Node *lhs = lvar_initializer(rest, tok, var);
Node *rhs = new_var_node(var, tok);
return new_binary(ND_COMMA, lhs, rhs, start);
}
Node *node = primary(&tok, tok);
for (;;) {
if (EQUAL(tok, "(")) {
@ -2961,7 +2948,7 @@ static Node *generic_selection(Token **rest, Token *tok) { @@ -2961,7 +2948,7 @@ static Node *generic_selection(Token **rest, Token *tok) {
return ret;
}
// primary = "(" "{" stmt stmt* "}" ")"
// primary = "(" "{" stmt+ "}" ")"
// | "(" expr ")"
// | "sizeof" "(" type-name ")"
// | "sizeof" unary
@ -3367,8 +3354,9 @@ static Token *function(Token *tok, Type *basety, VarAttr *attr) { @@ -3367,8 +3354,9 @@ static Token *function(Token *tok, Type *basety, VarAttr *attr) {
fn->va_area = new_lvar("__va_area__", array_of(ty_char, 136));
fn->alloca_bottom = new_lvar("__alloca_size__", pointer_to(ty_char));
tok = skip(tok, '{');
// [C18 6.4.2.2] "__func__" is automatically defined as a
// local variable containing the current function name.
// [https://www.sigbus.info/n1570#6.4.2.2p1] "__func__" is
// automatically defined as a local variable containing the
// current function name.
push_scope("__func__")->var =
new_string_literal(fn->name, array_of(ty_char, strlen(fn->name) + 1));
// [GNU] __FUNCTION__ is yet another name of __func__.
@ -3401,7 +3389,7 @@ static Token *global_variable(Token *tok, Type *basety, VarAttr *attr) { @@ -3401,7 +3389,7 @@ static Token *global_variable(Token *tok, Type *basety, VarAttr *attr) {
if (attr->align) var->align = attr->align;
if (EQUAL(tok, "=")) {
gvar_initializer(&tok, tok->next, var);
} else if (!attr->is_extern) {
} else if (!attr->is_extern && !attr->is_tls) {
var->is_tentative = true;
}
}
@ -3537,7 +3525,7 @@ Obj *parse(Token *tok) { @@ -3537,7 +3525,7 @@ Obj *parse(Token *tok) {
}
VarAttr attr = {};
tok = attribute_list(tok, &attr, thing_attributes);
Type *basety = typespec(&tok, tok, &attr);
Type *basety = declspec(&tok, tok, &attr);
if (attr.is_typedef) {
tok = parse_typedef(tok, basety);
continue;

62
third_party/chibicc/preprocess.c vendored

@ -96,7 +96,7 @@ static Token *skip_line(Token *tok) { @@ -96,7 +96,7 @@ static Token *skip_line(Token *tok) {
}
static Token *copy_token(Token *tok) {
Token *t = calloc(1, sizeof(Token));
Token *t = alloc_token();
*t = *tok;
t->next = NULL;
return t;
@ -234,9 +234,8 @@ static Token *copy_line(Token **rest, Token *tok) { @@ -234,9 +234,8 @@ static Token *copy_line(Token **rest, Token *tok) {
}
static Token *new_num_token(int val, Token *tmpl) {
char buf[30];
sprintf(buf, "%d\n", val);
return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, strdup(buf)));
char *buf = xasprintf("%d\n", val);
return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, buf));
}
static Token *read_const_expr(Token **rest, Token *tok) {
@ -270,10 +269,10 @@ static long eval_const_expr(Token **rest, Token *tok) { @@ -270,10 +269,10 @@ static long eval_const_expr(Token **rest, Token *tok) {
Token *expr = read_const_expr(rest, tok->next);
expr = preprocess2(expr);
if (expr->kind == TK_EOF) error_tok(start, "no expression");
// [C18 6.10.1.4] The standard requires we replace remaining
// non-macro identifiers with "0" before evaluating a constant
// expression. For example, `#if foo` is equivalent to `#if 0`
// if foo is not defined.
// [https://www.sigbus.info/n1570#6.10.1p4] The standard requires
// we replace remaining non-macro identifiers with "0" before
// evaluating a constant expression. For example, `#if foo` is
// equivalent to `#if 0` if foo is not defined.
for (Token *t = expr; t->kind != TK_EOF; t = t->next) {
if (t->kind == TK_IDENT) {
Token *next = t->next;
@ -453,8 +452,7 @@ static Token *stringize(Token *hash, Token *arg) { @@ -453,8 +452,7 @@ static Token *stringize(Token *hash, Token *arg) {
// Concatenate two tokens to create a new token.
static Token *paste(Token *lhs, Token *rhs) {
// Paste the two tokens.
char *buf = calloc(1, lhs->len + rhs->len + 1);
sprintf(buf, "%.*s%.*s", lhs->len, lhs->loc, rhs->len, rhs->loc);
char *buf = xasprintf("%.*s%.*s", lhs->len, lhs->loc, rhs->len, rhs->loc);
// Tokenize the resulting string.
Token *tok = tokenize(new_file(lhs->file->name, lhs->file->file_no, buf));
if (tok->next->kind != TK_EOF)
@ -706,7 +704,7 @@ static char *detect_include_guard(Token *tok) { @@ -706,7 +704,7 @@ static char *detect_include_guard(Token *tok) {
return NULL;
}
static Token *include_file(Token *tok, char *path) {
static Token *include_file(Token *tok, char *path, Token *filename_tok) {
// Check for "#pragma once"
if (hashmap_get(&pragma_once, path)) return tok;
// If we read the same file before, and if the file was guarded
@ -716,7 +714,8 @@ static Token *include_file(Token *tok, char *path) { @@ -716,7 +714,8 @@ static Token *include_file(Token *tok, char *path) {
char *guard_name = hashmap_get(&include_guards, path);
if (guard_name && hashmap_get(&macros, guard_name)) return tok;
Token *tok2 = tokenize_file(path);
if (!tok2) error_tok(tok, "%s: cannot open file: %s", path, strerror(errno));
if (!tok2)
error_tok(filename_tok, "%s: cannot open file: %s", path, strerror(errno));
guard_name = detect_include_guard(tok2);
if (guard_name) hashmap_put(&include_guards, path, guard_name);
return append(tok2, tok);
@ -760,19 +759,19 @@ static Token *preprocess2(Token *tok) { @@ -760,19 +759,19 @@ static Token *preprocess2(Token *tok) {
char *path =
xasprintf("%s/%s", dirname(strdup(start->file->name)), filename);
if (fileexists(path)) {
tok = include_file(tok, path);
tok = include_file(tok, path, start->next->next);
continue;
}
}
char *path = search_include_paths(filename);
tok = include_file(tok, path ? path : filename);
tok = include_file(tok, path ? path : filename, start->next->next);
continue;
}
if (EQUAL(tok, "include_next")) {
bool ignore;
char *filename = read_include_filename(&tok, tok->next, &ignore);
char *path = search_include_next(filename);
tok = include_file(tok, path ? path : filename);
tok = include_file(tok, path ? path : filename, start->next->next);
continue;
}
if (EQUAL(tok, "define")) {
@ -914,17 +913,13 @@ static char *format_date(struct tm *tm) { @@ -914,17 +913,13 @@ static char *format_date(struct tm *tm) {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
};
char buf[30];
sprintf(buf, "\"%s %2d %d\"", mon[tm->tm_mon], tm->tm_mday,
tm->tm_year + 1900);
return strdup(buf);
return xasprintf("\"%s %2d %d\"", mon[tm->tm_mon], tm->tm_mday,
tm->tm_year + 1900);
}
// __TIME__ is expanded to the current time, e.g. "13:34:03".
static char *format_time(struct tm *tm) {
char buf[30];
sprintf(buf, "\"%02d:%02d:%02d\"", tm->tm_hour, tm->tm_min, tm->tm_sec);
return strdup(buf);
return xasprintf("\"%02d:%02d:%02d\"", tm->tm_hour, tm->tm_min, tm->tm_sec);
}
void init_macros(void) {
@ -1302,11 +1297,11 @@ static void join_adjacent_string_literals(Token *tok) { @@ -1302,11 +1297,11 @@ static void join_adjacent_string_literals(Token *tok) {
}
// Second pass: concatenate adjacent string literals.
for (Token *tok1 = tok; tok1->kind != TK_EOF;) {
Token *tok2 = tok1->next;
if (tok1->kind != TK_STR || tok2->kind != TK_STR) {
if (tok1->kind != TK_STR || tok1->next->kind != TK_STR) {
tok1 = tok1->next;
continue;
}
#if 0
assert(tok1->ty->base->size == tok2->ty->base->size);
Token *t = copy_token(tok1);
t->ty =
@ -1317,6 +1312,25 @@ static void join_adjacent_string_literals(Token *tok) { @@ -1317,6 +1312,25 @@ static void join_adjacent_string_literals(Token *tok) {
tok2->str, tok2->ty->size);
t->len = strlen(t->loc);
*tok1 = *t;
#else
Token *tok2 = tok1->next;
while (tok2->kind == TK_STR) tok2 = tok2->next;
int len = tok1->ty->array_len;
for (Token *t = tok1->next; t != tok2; t = t->next) {
len = len + t->ty->array_len - 1;
}
char *buf = calloc(tok1->ty->base->size, len);
int i = 0;
for (Token *t = tok1; t != tok2; t = t->next) {
memcpy(buf + i, t->str, t->ty->size);
i = i + t->ty->size - t->ty->base->size;
}
*tok1 = *copy_token(tok1);
tok1->ty = array_of(tok1->ty->base, len);
tok1->str = buf;
tok1->next = tok2;
tok1 = tok2;
#endif
}
}

4
third_party/chibicc/test/hog_test.c vendored

@ -1,4 +0,0 @@ @@ -1,4 +0,0 @@
main(void) {
void *p;
p = "hello";
}

9
third_party/chibicc/test/initializer_test.c vendored

@ -445,6 +445,15 @@ int main() { @@ -445,6 +445,15 @@ int main() {
};
x.a;
}));
ASSERT(1, ({
union {
int a;
char b;
} x = {
1,
};
x.a;
}));
ASSERT(2, ({
enum {
x,

18
third_party/chibicc/test/struct_test.c vendored

@ -392,6 +392,24 @@ int main() { @@ -392,6 +392,24 @@ int main() {
} x = {1}, y = {2};
(0 ? x : y).a;
}));
ASSERT(2, ({
struct {
int a;
} x = {1}, y = {2};
(x = y).a;
}));
ASSERT(1, ({
struct {
int a;
} x = {1}, y = {2};
(1 ? x : y).a;
}));
ASSERT(2, ({
struct {
int a;
} x = {1}, y = {2};
(0 ? x : y).a;
}));
return 0;
}

94
third_party/chibicc/tokenize.c vendored

@ -38,7 +38,7 @@ static void verror_at(char *filename, char *input, int line_no, char *loc, @@ -38,7 +38,7 @@ static void verror_at(char *filename, char *input, int line_no, char *loc,
int indent = fprintf(stderr, "%s:%d: ", filename, line_no);
fprintf(stderr, "%.*s\n", (int)(end - line), line);
// Show the error message.
int pos = str_width(line, loc - line) + indent;
int pos = display_width(line, loc - line) + indent;
fprintf(stderr, "%*s", pos, ""); // print pos spaces.
fprintf(stderr, "^ ");
vfprintf(stderr, fmt, ap);
@ -53,6 +53,7 @@ void error_at(char *loc, char *fmt, ...) { @@ -53,6 +53,7 @@ void error_at(char *loc, char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
verror_at(current_file->name, current_file->contents, line_no, loc, fmt, ap);
va_end(ap);
exit(1);
}
@ -64,7 +65,7 @@ void error_tok(Token *tok, char *fmt, ...) { @@ -64,7 +65,7 @@ void error_tok(Token *tok, char *fmt, ...) {
verror_at(t->file->name, t->file->contents, t->line_no, t->loc, fmt, ap);
va_end(ap);
}
va_end(va);
va_end(ap);
exit(1);
}
@ -73,6 +74,7 @@ void warn_tok(Token *tok, char *fmt, ...) { @@ -73,6 +74,7 @@ void warn_tok(Token *tok, char *fmt, ...) {
va_start(ap, fmt);
verror_at(tok->file->name, tok->file->contents, tok->line_no, tok->loc, fmt,
ap);
va_end(ap);
}
static int is_space(int c) {
@ -103,9 +105,9 @@ Token *skip(Token *tok, char op) { @@ -103,9 +105,9 @@ Token *skip(Token *tok, char op) {
}
}
// Create a new token and add it as the next token of `cur`.
// Create a new token.
static Token *new_token(TokenKind kind, char *start, char *end) {
Token *tok = calloc(1, sizeof(Token));
Token *tok = alloc_token();
tok->kind = kind;
tok->loc = start;
tok->len = end - start;
@ -117,18 +119,17 @@ static Token *new_token(TokenKind kind, char *start, char *end) { @@ -117,18 +119,17 @@ static Token *new_token(TokenKind kind, char *start, char *end) {
return tok;
}
// Read an identifier and returns a pointer pointing to the end
// of an identifier.
//
// Returns null if p does not point to a valid identifier.
static char *read_ident(char *p) {
// Read an identifier and returns the length of it.
// If p does not point to a valid identifier, 0 is returned.
static int read_ident(char *start) {
char *p = start;
uint32_t c = decode_utf8(&p, p);
if (!is_ident1(c)) return NULL;
if (!is_ident1(c)) return 0;
for (;;) {
char *q;
c = decode_utf8(&q, p);
if (!('a' <= c && c <= 'f') && !is_ident2(c)) {
return p;
return p - start;
}
p = q;
}
@ -140,6 +141,19 @@ static int from_hex(char c) { @@ -140,6 +141,19 @@ static int from_hex(char c) {
return c - 'A' + 10;
}
// Read a punctuator token from p and returns its length.
static int read_punct(char *p) {
static char *kw[] = {"<<=", ">>=", "...", "==", "!=", "<=", ">=", "->",
"+=", "-=", "*=", "/=", "++", "--", "%=", "&=",
"|=", "^=", "&&", "||", "<<", ">>", "##"};
for (int i = 0; i < sizeof(kw) / sizeof(*kw); i++) {
if (startswith(p, kw[i])) {
return strlen(kw[i]);
}
}
return ispunct(*p) ? 1 : 0;
}
static bool is_keyword(Token *tok) {
static HashMap map;
if (map.capacity == 0) {
@ -190,6 +204,17 @@ static int read_escaped_char(char **new_pos, char *p) { @@ -190,6 +204,17 @@ static int read_escaped_char(char **new_pos, char *p) {
return c;
}
*new_pos = p + 1;
// Escape sequences are defined using themselves here. E.g.
// '\n' is implemented using '\n'. This tautological definition
// works because the compiler that compiles our compiler knows
// what '\n' actually is. In other words, we "inherit" the ASCII
// code of '\n' from the compiler that compiles our compiler,
// so we don't have to teach the actual code here.
//
// This fact has huge implications not only for the correctness
// of the compiler but also for the security of the generated code.
// For more info, read "Reflections on Trusting Trust" by Ken Thompson.
// https://github.com/rui314/chibicc/wiki/thompson1984.pdf
switch (*p) {
case 'a':
return '\a';
@ -217,7 +242,7 @@ static int read_escaped_char(char **new_pos, char *p) { @@ -217,7 +242,7 @@ static int read_escaped_char(char **new_pos, char *p) {
static char *string_literal_end(char *p) {
char *start = p;
for (; *p != '"'; p++) {
if (*p == '\0') error_at(start, "unclosed string literal");
if (*p == '\n' || *p == '\0') error_at(start, "unclosed string literal");
if (*p == '\\') p++;
}
return p;
@ -225,7 +250,7 @@ static char *string_literal_end(char *p) { @@ -225,7 +250,7 @@ static char *string_literal_end(char *p) {
static Token *read_string_literal(char *start, char *quote) {
char *end = string_literal_end(quote + 1);
char *buf = calloc(1, end - quote);
char *buf = calloc(2, end - quote);
int len = 0;
for (char *p = quote + 1; p < end;) {
if (*p == '\\')
@ -409,7 +434,7 @@ static void convert_pp_number(Token *tok) { @@ -409,7 +434,7 @@ static void convert_pp_number(Token *tok) {
void convert_pp_tokens(Token *tok) {
for (Token *t = tok; t->kind != TK_EOF; t = t->next) {
if (is_keyword(t))
t->kind = TK_RESERVED;
t->kind = TK_KEYWORD;
else if (t->kind == TK_PP_NUM)
convert_pp_number(t);
}
@ -546,34 +571,17 @@ Token *tokenize(File *file) { @@ -546,34 +571,17 @@ Token *tokenize(File *file) {
continue;
}
// Identifier or keyword
char *q;
if ((q = read_ident(p)) != NULL) {
cur = cur->next = new_token(TK_IDENT, p, q);
p = q;
continue;
}
// Three-letter punctuators
if (LOOKINGAT(p, "<<=") || LOOKINGAT(p, ">>=") || LOOKINGAT(p, "...")) {
cur = cur->next = new_token(TK_RESERVED, p, p + 3);
p += 3;
continue;
}
// Two-letter punctuators
if (LOOKINGAT(p, "==") || LOOKINGAT(p, "!=") || LOOKINGAT(p, "<=") ||
LOOKINGAT(p, ">=") || LOOKINGAT(p, "->") || LOOKINGAT(p, "+=") ||
LOOKINGAT(p, "-=") || LOOKINGAT(p, "*=") || LOOKINGAT(p, "/=") ||
LOOKINGAT(p, "++") || LOOKINGAT(p, "--") || LOOKINGAT(p, "%=") ||
LOOKINGAT(p, "&=") || LOOKINGAT(p, "|=") || LOOKINGAT(p, "^=") ||
LOOKINGAT(p, "&&") || LOOKINGAT(p, "||") || LOOKINGAT(p, "<<") ||
LOOKINGAT(p, ">>") || LOOKINGAT(p, "##")) {
cur = cur->next = new_token(TK_RESERVED, p, p + 2);
p += 2;
int ident_len = read_ident(p);
if (ident_len) {
cur = cur->next = new_token(TK_IDENT, p, p + ident_len);
p += cur->len;
continue;
}
// Single-letter punctuators
if (ispunct(*p)) {
cur = cur->next = new_token(TK_RESERVED, p, p + 1);
p++;
// Punctuators
int punct_len = read_punct(p);
if (punct_len) {
cur = cur->next = new_token(TK_PUNCT, p, p + punct_len);
p += cur->len;
continue;
}
error_at(p, "invalid token");
@ -665,6 +673,7 @@ static void remove_backslash_newline(char *p) { @@ -665,6 +673,7 @@ static void remove_backslash_newline(char *p) {
p[j++] = p[i++];
}
}
for (; n > 0; n--) p[j++] = '\n';
p[j] = '\0';
}
@ -710,6 +719,11 @@ static void convert_universal_chars(char *p) { @@ -710,6 +719,11 @@ static void convert_universal_chars(char *p) {
Token *tokenize_file(char *path) {
char *p = read_file(path);
if (!p) return NULL;
// UTF-8 texts may start with a 3-byte "BOM" marker sequence.
// If exists, just skip them because they are useless bytes.
// (It is actually not recommended to add BOM markers to UTF-8
// texts, but it's not uncommon particularly on Windows.)
if (!memcmp(p, "\xef\xbb\xbf", 3)) p += 3;
canonicalize_newline(p);
remove_backslash_newline(p);
convert_universal_chars(p);

4
third_party/chibicc/type.c vendored

@ -18,7 +18,7 @@ Type ty_double[1] = {{TY_DOUBLE, 8, 8}}; @@ -18,7 +18,7 @@ Type ty_double[1] = {{TY_DOUBLE, 8, 8}};
Type ty_ldouble[1] = {{TY_LDOUBLE, 16, 16}};
static Type *new_type(TypeKind kind, int size, int align) {
Type *ty = calloc(1, sizeof(Type));
Type *ty = alloc_type();
ty->kind = kind;
ty->size = size;
ty->align = align;
@ -77,7 +77,7 @@ bool is_compatible(Type *t1, Type *t2) { @@ -77,7 +77,7 @@ bool is_compatible(Type *t1, Type *t2) {
}
Type *copy_type(Type *ty) {
Type *ret = calloc(1, sizeof(Type));
Type *ret = alloc_type();
*ret = *ty;
ret->origin = ty;
return ret;

8
third_party/chibicc/unicode.c vendored

@ -66,9 +66,9 @@ static bool in_range(uint32_t *range, uint32_t c) { @@ -66,9 +66,9 @@ static bool in_range(uint32_t *range, uint32_t c) {
return false;
}
// C11 allows not only ASCII but some multibyte characters in certan
// Unicode ranges to be used in an identifier. See C11 Annex D for the
// details.
// [https://www.sigbus.info/n1570#D] C11 allows not only ASCII but
// some multibyte characters in certan Unicode ranges to be used in an
// identifier.
//
// This function returns true if a given character is acceptable as
// the first character of an identifier.
@ -108,7 +108,7 @@ bool is_ident2(uint32_t c) { @@ -108,7 +108,7 @@ bool is_ident2(uint32_t c) {
// Returns the number of columns needed to display a given
// string in a fixed-width font.
int str_width(char *p, int len) {
int display_width(char *p, int len) {
char *start = p;
int w = 0;
while (p - start < len) {

Loading…
Cancel
Save