Integrate more chibicc changes

main
Justine Tunney 2020-12-09 13:53:02 -08:00
parent 2ed7956be4
commit 15280753e2
13 changed files with 293 additions and 182 deletions

44
third_party/chibicc/alloc.c vendored 100644
View File

@ -0,0 +1,44 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "third_party/chibicc/chibicc.h"
long alloc_node_count;
long alloc_token_count;
long alloc_obj_count;
long alloc_type_count;
Node *alloc_node(void) {
++alloc_node_count;
return calloc(1, sizeof(Node));
}
Token *alloc_token(void) {
++alloc_token_count;
return calloc(1, sizeof(Token));
}
Obj *alloc_obj(void) {
++alloc_obj_count;
return calloc(1, sizeof(Obj));
}
Type *alloc_type(void) {
++alloc_type_count;
return calloc(1, sizeof(Type));
}

View File

@ -141,7 +141,16 @@ static char *quote_makefile(char *s) {
static void PrintMemoryUsage(void) {
struct mallinfo mi;
mi = mallinfo();
fprintf(stderr, "\n");
fprintf(stderr, "allocated %,ld bytes of memory\n", mi.arena);
fprintf(stderr, "allocated %,ld nodes (%,ld bytes)\n", alloc_node_count,
sizeof(Node) * alloc_node_count);
fprintf(stderr, "allocated %,ld tokens (%,ld bytes)\n", alloc_token_count,
sizeof(Token) * alloc_token_count);
fprintf(stderr, "allocated %,ld objs (%,ld bytes)\n", alloc_obj_count,
sizeof(Obj) * alloc_obj_count);
fprintf(stderr, "allocated %,ld types (%,ld bytes)\n", alloc_type_count,
sizeof(Type) * alloc_type_count);
}
static void strarray_push_comma(StringArray *a, char *s) {

View File

@ -62,14 +62,14 @@ void strarray_push(StringArray *, char *);
// tokenize.c
//
// Token
typedef enum {
TK_RESERVED, // Keywords or punctuators
TK_IDENT, // Identifiers
TK_STR, // String literals
TK_NUM, // Numeric literals
TK_PP_NUM, // Preprocessing numbers
TK_EOF, // End-of-file markers
TK_IDENT, // Identifiers
TK_PUNCT, // Punctuators
TK_KEYWORD, // Keywords
TK_STR, // String literals
TK_NUM, // Numeric literals
TK_PP_NUM, // Preprocessing numbers
TK_EOF, // End-of-file markers
} TokenKind;
struct File {
@ -81,15 +81,14 @@ struct File {
int line_delta;
};
// Token type
struct Token {
TokenKind kind; // Token kind
struct thatispacked Token {
Token *next; // Next token
int len; // Token length
int line_no; // Line number
int line_delta; // Line number
TokenKind kind; // Token kind
bool at_bol; // True if this token is at beginning of line
bool has_space; // True if this token follows a space character
Token *next; // Next token
char *loc; // Token location
Type *ty; // Used if TK_NUM or TK_STR
File *file; // Source location
@ -518,7 +517,7 @@ int encode_utf8(char *, uint32_t);
uint32_t decode_utf8(char **, char *);
bool is_ident1(uint32_t);
bool is_ident2(uint32_t);
int str_width(char *, int);
int display_width(char *, int);
//
// hashmap.c
@ -564,6 +563,20 @@ extern bool opt_sse4;
extern bool opt_verbose;
extern char *base_file;
//
// alloc.c
//
extern long alloc_node_count;
extern long alloc_token_count;
extern long alloc_obj_count;
extern long alloc_type_count;
Node *alloc_node(void);
Token *alloc_token(void);
Obj *alloc_obj(void);
Type *alloc_type(void);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_THIRD_PARTY_CHIBICC_CHIBICC_H_ */

View File

@ -2219,7 +2219,7 @@ static void emit_data(Obj *prog) {
int align = (var->ty->kind == TY_ARRAY && var->ty->size >= 16)
? MAX(16, var->align)
: var->align;
if (opt_common && var->is_tentative && !var->is_tls) {
if (opt_common && var->is_tentative) {
println("\t.comm\t%s,%d,%d", nameof(var), var->ty->size, align);
} else {
if (var->section) {
@ -2410,10 +2410,10 @@ static void emit_text(Obj *prog) {
// Emit code
gen_stmt(fn->body);
assert(!depth);
// The C spec defines a special rule for the main function.
// Reaching the end of the main function is equivalent to
// returning 0, even though the behavior is undefined for the
// other functions. See C11 5.1.2.2.3.
// [https://www.sigbus.info/n1570#5.1.2.2.3p1] The C spec defines
// a special rule for the main function. Reaching the end of the
// main function is equivalent to returning 0, even though the
// behavior is undefined for the other functions.
if (strcmp(nameof(fn), "main") == 0) {
emitlin("\txor\t%eax,%eax");
}

View File

@ -2,7 +2,10 @@
#include "third_party/chibicc/chibicc.h"
#define TOMBSTONE ((void *)-1) // Represents a deleted hash entry
#define INIT_SIZE 16 // initial hash bucket size
#define LOW_WATERMARK 50 // keep usage below 50% after rehashing
#define HIGH_WATERMARK 70 // perform rehash when usage exceeds 70%
#define TOMBSTONE ((void *)-1) // represents deleted hash table entry
static uint64_t fnv_hash(char *s, int len) {
uint64_t hash = 0xcbf29ce484222325;
@ -24,7 +27,8 @@ static void rehash(HashMap *map) {
}
}
size_t cap = map->capacity;
while ((nkeys * 100) / cap >= 50) cap = cap * 2;
while ((nkeys * 100) / cap >= LOW_WATERMARK) cap = cap * 2;
assert(cap > 0);
// Create a new hashmap and copy all key-values.
HashMap map2 = {};
map2.buckets = calloc(cap, sizeof(HashEntry));
@ -56,9 +60,11 @@ static HashEntry *get_entry(HashMap *map, char *key, int keylen) {
static HashEntry *get_or_insert_entry(HashMap *map, char *key, int keylen) {
if (!map->buckets) {
map->buckets = calloc((map->capacity = 16), sizeof(HashEntry));
map->buckets = calloc(INIT_SIZE, sizeof(HashEntry));
map->capacity = INIT_SIZE;
} else if ((map->used * 100) / map->capacity >= HIGH_WATERMARK) {
rehash(map);
}
if ((map->used * 100) / map->capacity >= 70) rehash(map);
uint64_t hash = fnv_hash(key, keylen);
for (int i = 0; i < map->capacity; i++) {
HashEntry *ent = &map->buckets[(hash + i) & (map->capacity - 1)];

View File

@ -25,21 +25,13 @@ typedef struct Scope Scope;
// Scope for local variables, global variables, typedefs
// or enum constants
typedef struct {
char *name;
int depth;
Obj *var;
Type *type_def;
Type *enum_ty;
int enum_val;
} VarScope;
// Scope for struct, union or enum tags
typedef struct {
char *name;
int depth;
Type *ty;
} TagScope;
// Represents a block scope.
struct Scope {
Scope *next;
// C has two block scopes; one is for variables/typedefs and
@ -103,10 +95,6 @@ static Obj *globals;
static Scope *scope = &(Scope){};
// scope_depth is incremented by one at the beginning of a block
// scope and decremented by one at the end of a block scope.
static int scope_depth;
// Points to the function object the parser is currently parsing.
static Obj *current_fn;
@ -173,12 +161,10 @@ static void enter_scope(void) {
Scope *sc = calloc(1, sizeof(Scope));
sc->next = scope;
scope = sc;
scope_depth++;
}
static void leave_scope(void) {
scope = scope->next;
scope_depth--;
}
// Find a variable by name.
@ -190,16 +176,16 @@ static VarScope *find_var(Token *tok) {
return NULL;
}
static TagScope *find_tag(Token *tok) {
static Type *find_tag(Token *tok) {
for (Scope *sc = scope; sc; sc = sc->next) {
TagScope *sc2 = hashmap_get2(&sc->tags, tok->loc, tok->len);
if (sc2) return sc2;
Type *ty = hashmap_get2(&sc->tags, tok->loc, tok->len);
if (ty) return ty;
}
return NULL;
}
Node *new_node(NodeKind kind, Token *tok) {
Node *node = calloc(1, sizeof(Node));
Node *node = alloc_node();
node->kind = kind;
node->tok = tok;
return node;
@ -252,7 +238,7 @@ static Node *new_vla_ptr(Obj *var, Token *tok) {
Node *new_cast(Node *expr, Type *ty) {
add_type(expr);
Node *node = calloc(1, sizeof(Node));
Node *node = alloc_node();
node->kind = ND_CAST;
node->tok = expr->tok;
node->lhs = expr;
@ -262,8 +248,6 @@ Node *new_cast(Node *expr, Type *ty) {
static VarScope *push_scope(char *name) {
VarScope *sc = calloc(1, sizeof(VarScope));
sc->name = name;
sc->depth = scope_depth;
hashmap_put(&scope->vars, name, sc);
return sc;
}
@ -303,7 +287,7 @@ static Initializer *new_initializer(Type *ty, bool is_flexible) {
}
static Obj *new_var(char *name, Type *ty) {
Obj *var = calloc(1, sizeof(Obj));
Obj *var = alloc_obj();
var->name = name;
var->ty = ty;
var->align = ty->align;
@ -330,9 +314,7 @@ static Obj *new_gvar(char *name, Type *ty) {
static char *new_unique_name(void) {
static int id = 0;
char *buf = calloc(1, 20);
sprintf(buf, ".L..%d", id++);
return buf;
return xasprintf(".L..%d", id++);
}
static Obj *new_anon_gvar(Type *ty) {
@ -360,11 +342,7 @@ static Type *find_typedef(Token *tok) {
}
static void push_tag_scope(Token *tok, Type *ty) {
TagScope *sc = calloc(1, sizeof(TagScope));
sc->name = strndup(tok->loc, tok->len);
sc->depth = scope_depth;
sc->ty = ty;
hashmap_put2(&scope->tags, tok->loc, tok->len, sc);
hashmap_put2(&scope->tags, tok->loc, tok->len, ty);
}
// Consumes token if equal to STR or __STR__.
@ -599,9 +577,14 @@ static Token *thing_attributes(Token *tok, void *arg) {
error_tok(tok, "unknown function attribute");
}
// typespec = typename typename*
// typename = "void" | "_Bool" | "char" | "short" | "int" | "long"
// | struct-decl | union-decl | typedef-name
// declspec = ("void" | "_Bool" | "char" | "short" | "int" | "long"
// | "typedef" | "static" | "extern" | "inline"
// | "_Thread_local" | "__thread"
// | "signed" | "unsigned"
// | struct-decl | union-decl | typedef-name
// | enum-specifier | typeof-specifier
// | "const" | "volatile" | "auto" | "register" | "restrict"
// | "__restrict" | "__restrict__" | "_Noreturn")+
//
// The order of typenames in a type-specifier doesn't matter. For
// example, `int long static` means the same as `static long int`.
@ -614,7 +597,7 @@ static Token *thing_attributes(Token *tok, void *arg) {
// while keeping the "current" type object that the typenames up
// until that point represent. When we reach a non-typename token,
// we returns the current type object.
static Type *typespec(Token **rest, Token *tok, VarAttr *attr) {
static Type *declspec(Token **rest, Token *tok, VarAttr *attr) {
// We use a single integer as counters for all typenames.
// For example, bits 0 and 1 represents how many times we saw the
// keyword "void" so far. With this, we can use a switch statement
@ -851,7 +834,7 @@ static Token *static_assertion(Token *tok) {
}
// func-params = ("void" | param ("," param)* ("," "...")?)? ")"
// param = typespec declarator
// param = declspec declarator
static Type *func_params(Token **rest, Token *tok, Type *ty) {
if (EQUAL(tok, "void") && EQUAL(tok->next, ")")) {
*rest = tok->next->next;
@ -868,7 +851,7 @@ static Type *func_params(Token **rest, Token *tok, Type *ty) {
skip(tok, ')');
break;
}
Type *ty2 = typespec(&tok, tok, NULL);
Type *ty2 = declspec(&tok, tok, NULL);
ty2 = declarator(&tok, tok, ty2);
Token *name = ty2->name;
if (ty2->kind == TY_ARRAY) {
@ -935,8 +918,8 @@ static Type *declarator(Token **rest, Token *tok, Type *ty) {
ty = pointers(&tok, tok, ty);
if (EQUAL(tok, "(")) {
Token *start = tok;
Type ignore = {};
declarator(&tok, tok->next, &ignore);
Type dummy = {};
declarator(&tok, start->next, &dummy);
tok = skip(tok, ')');
ty = type_suffix(rest, tok, ty);
ty = declarator(&tok, start->next, ty);
@ -959,8 +942,8 @@ static Type *abstract_declarator(Token **rest, Token *tok, Type *ty) {
ty = pointers(&tok, tok, ty);
if (EQUAL(tok, "(")) {
Token *start = tok;
Type ignore = {};
abstract_declarator(&tok, tok->next, &ignore);
Type dummy = {};
abstract_declarator(&tok, start->next, &dummy);
tok = skip(tok, ')');
ty = type_suffix(rest, tok, ty);
return abstract_declarator(&tok, start->next, ty);
@ -968,9 +951,9 @@ static Type *abstract_declarator(Token **rest, Token *tok, Type *ty) {
return type_suffix(rest, tok, ty);
}
// type-name = typespec abstract-declarator
// type-name = declspec abstract-declarator
static Type *typename(Token **rest, Token *tok) {
Type *ty = typespec(&tok, tok, NULL);
Type *ty = declspec(&tok, tok, NULL);
return abstract_declarator(rest, tok, ty);
}
@ -1003,11 +986,11 @@ static Type *enum_specifier(Token **rest, Token *tok) {
tok = tok->next;
}
if (tag && !EQUAL(tok, "{")) {
TagScope *sc = find_tag(tag);
if (!sc) error_tok(tag, "unknown enum type");
if (sc->ty->kind != TY_ENUM) error_tok(tag, "not an enum tag");
Type *ty = find_tag(tag);
if (!ty) error_tok(tag, "unknown enum type");
if (ty->kind != TY_ENUM) error_tok(tag, "not an enum tag");
*rest = tok;
return sc->ty;
return ty;
}
tok = skip(tok, '{');
// Read an enum-list.
@ -1070,8 +1053,8 @@ static Node *new_alloca(Node *sz) {
return node;
}
// declaration = typespec (declarator ("=" expr)? ("," declarator ("="
// expr)?)*)? ";"
// declaration = declspec (declarator ("=" expr)?
// ("," declarator ("=" expr)?)*)? ";"
static Node *declaration(Token **rest, Token *tok, Type *basety,
VarAttr *attr) {
Node head = {};
@ -1363,9 +1346,11 @@ static void struct_initializer1(Token **rest, Token *tok, Initializer *init) {
// struct-initializer2 = initializer ("," initializer)*
static void struct_initializer2(Token **rest, Token *tok, Initializer *init,
Member *mem) {
bool first = true;
for (; mem && !is_end(tok); mem = mem->next) {
Token *start = tok;
if (mem != init->ty->members) tok = skip(tok, ',');
if (!first) tok = skip(tok, ',');
first = false;
if (EQUAL(tok, "[") || EQUAL(tok, ".")) {
*rest = start;
return;
@ -1389,6 +1374,7 @@ static void union_initializer(Token **rest, Token *tok, Initializer *init) {
init->mem = init->ty->members;
if (EQUAL(tok, "{")) {
initializer2(&tok, tok->next, init->children[0]);
CONSUME(&tok, tok, ",");
*rest = skip(tok, '}');
} else {
initializer2(rest, tok, init->children[0]);
@ -1769,7 +1755,7 @@ static Node *stmt(Token **rest, Token *tok) {
brk_label = node->brk_label = new_unique_name();
cont_label = node->cont_label = new_unique_name();
if (is_typename(tok)) {
Type *basety = typespec(&tok, tok, NULL);
Type *basety = declspec(&tok, tok, NULL);
node->init = declaration(&tok, tok, basety, NULL);
} else {
node->init = expr_stmt(&tok, tok);
@ -1872,7 +1858,7 @@ static Node *compound_stmt(Token **rest, Token *tok) {
while (!EQUAL(tok, "}")) {
if (is_typename(tok) && !EQUAL(tok->next, ":")) {
VarAttr attr = {};
Type *basety = typespec(&tok, tok, &attr);
Type *basety = declspec(&tok, tok, &attr);
if (attr.is_typedef) {
tok = parse_typedef(tok, basety);
continue;
@ -2565,30 +2551,14 @@ static Node *mul(Token **rest, Token *tok) {
}
}
// compound-literal = initializer "}"
static Node *compound_literal(Token **rest, Token *tok, Type *ty,
Token *start) {
if (scope_depth == 0) {
Obj *var = new_anon_gvar(ty);
gvar_initializer(rest, tok, var);
return new_var_node(var, start);
}
Obj *var = new_lvar(new_unique_name(), ty);
Node *lhs = lvar_initializer(rest, tok, var);
Node *rhs = new_var_node(var, tok);
return new_binary(ND_COMMA, lhs, rhs, tok);
}
// cast = "(" type-name ")" "{" compound-literal
// | "(" type-name ")" cast
// | unary
// cast = "(" type-name ")" cast | unary
static Node *cast(Token **rest, Token *tok) {
if (EQUAL(tok, "(") && is_typename(tok->next)) {
Token *start = tok;
Type *ty = typename(&tok, tok->next);
tok = skip(tok, ')');
// compound literal
if (EQUAL(tok, "{")) return compound_literal(rest, tok, ty, start);
if (EQUAL(tok, "{")) return unary(rest, start);
// type cast
Node *node = new_cast(cast(rest, tok), ty);
node->tok = start;
@ -2612,9 +2582,10 @@ static Node *unary(Token **rest, Token *tok) {
return new_unary(ND_ADDR, lhs, tok);
}
if (EQUAL(tok, "*")) {
// [C18 6.5.3.2p4] This is an oddity in the C spec, but dereferencing
// a function shouldn't do anything. If foo is a function, `*foo`,
// `**foo` or `*****foo` are all equivalent to just `foo`.
// [https://www.sigbus.info/n1570#6.5.3.2p4] This is an oddity
// in the C spec, but dereferencing a function shouldn't do
// anything. If foo is a function, `*foo`, `**foo` or `*****foo`
// are all equivalent to just `foo`.
Node *node = cast(rest, tok->next);
add_type(node);
if (node->ty->kind == TY_FUNC) return node;
@ -2640,14 +2611,14 @@ static Node *unary(Token **rest, Token *tok) {
return postfix(rest, tok);
}
// struct-members = (typespec declarator ("," declarator)* ";")*
// struct-members = (declspec declarator ("," declarator)* ";")*
static void struct_members(Token **rest, Token *tok, Type *ty) {
Member head = {};
Member *cur = &head;
int idx = 0;
while (!EQUAL(tok, "}")) {
VarAttr attr = {};
Type *basety = typespec(&tok, tok, &attr);
Type *basety = declspec(&tok, tok, &attr);
bool first = true;
// Anonymous struct member
if ((basety->kind == TY_STRUCT || basety->kind == TY_UNION) &&
@ -2708,8 +2679,8 @@ static Type *struct_union_decl(Token **rest, Token *tok) {
}
if (tag && !EQUAL(tok, "{")) {
*rest = tok;
TagScope *sc = find_tag(tag);
if (sc) return sc->ty;
Type *ty2 = find_tag(tag);
if (ty2) return ty2;
ty->size = -1;
push_tag_scope(tag, ty);
return ty;
@ -2721,10 +2692,10 @@ static Type *struct_union_decl(Token **rest, Token *tok) {
if (tag) {
// If this is a redefinition, overwrite a previous type.
// Otherwise, register the struct type.
TagScope *sc = find_tag(tag);
if (sc && sc->depth == scope_depth) {
*sc->ty = *ty;
return sc->ty;
Type *ty2 = hashmap_get2(&scope->tags, tag->loc, tag->len);
if (ty2) {
*ty2 = *ty;
return ty2;
}
push_tag_scope(tag, ty);
}
@ -2837,7 +2808,8 @@ static Node *new_inc_dec(Node *node, Token *tok, int addend) {
node->ty);
}
// postfix = ident "(" func-args ")" postfix-tail*
// postfix = "(" type-name ")" "{" initializer-list "}"
// | ident "(" func-args ")" postfix-tail*
// | primary postfix-tail*
//
// postfix-tail = "[" expr "]"
@ -2847,6 +2819,21 @@ static Node *new_inc_dec(Node *node, Token *tok, int addend) {
// | "++"
// | "--"
static Node *postfix(Token **rest, Token *tok) {
if (EQUAL(tok, "(") && is_typename(tok->next)) {
// Compound literal
Token *start = tok;
Type *ty = typename(&tok, tok->next);
tok = skip(tok, ')');
if (scope->next == NULL) {
Obj *var = new_anon_gvar(ty);
gvar_initializer(rest, tok, var);
return new_var_node(var, start);
}
Obj *var = new_lvar("", ty);
Node *lhs = lvar_initializer(rest, tok, var);
Node *rhs = new_var_node(var, tok);
return new_binary(ND_COMMA, lhs, rhs, start);
}
Node *node = primary(&tok, tok);
for (;;) {
if (EQUAL(tok, "(")) {
@ -2961,7 +2948,7 @@ static Node *generic_selection(Token **rest, Token *tok) {
return ret;
}
// primary = "(" "{" stmt stmt* "}" ")"
// primary = "(" "{" stmt+ "}" ")"
// | "(" expr ")"
// | "sizeof" "(" type-name ")"
// | "sizeof" unary
@ -3367,8 +3354,9 @@ static Token *function(Token *tok, Type *basety, VarAttr *attr) {
fn->va_area = new_lvar("__va_area__", array_of(ty_char, 136));
fn->alloca_bottom = new_lvar("__alloca_size__", pointer_to(ty_char));
tok = skip(tok, '{');
// [C18 6.4.2.2] "__func__" is automatically defined as a
// local variable containing the current function name.
// [https://www.sigbus.info/n1570#6.4.2.2p1] "__func__" is
// automatically defined as a local variable containing the
// current function name.
push_scope("__func__")->var =
new_string_literal(fn->name, array_of(ty_char, strlen(fn->name) + 1));
// [GNU] __FUNCTION__ is yet another name of __func__.
@ -3401,7 +3389,7 @@ static Token *global_variable(Token *tok, Type *basety, VarAttr *attr) {
if (attr->align) var->align = attr->align;
if (EQUAL(tok, "=")) {
gvar_initializer(&tok, tok->next, var);
} else if (!attr->is_extern) {
} else if (!attr->is_extern && !attr->is_tls) {
var->is_tentative = true;
}
}
@ -3537,7 +3525,7 @@ Obj *parse(Token *tok) {
}
VarAttr attr = {};
tok = attribute_list(tok, &attr, thing_attributes);
Type *basety = typespec(&tok, tok, &attr);
Type *basety = declspec(&tok, tok, &attr);
if (attr.is_typedef) {
tok = parse_typedef(tok, basety);
continue;

View File

@ -96,7 +96,7 @@ static Token *skip_line(Token *tok) {
}
static Token *copy_token(Token *tok) {
Token *t = calloc(1, sizeof(Token));
Token *t = alloc_token();
*t = *tok;
t->next = NULL;
return t;
@ -234,9 +234,8 @@ static Token *copy_line(Token **rest, Token *tok) {
}
static Token *new_num_token(int val, Token *tmpl) {
char buf[30];
sprintf(buf, "%d\n", val);
return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, strdup(buf)));
char *buf = xasprintf("%d\n", val);
return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, buf));
}
static Token *read_const_expr(Token **rest, Token *tok) {
@ -270,10 +269,10 @@ static long eval_const_expr(Token **rest, Token *tok) {
Token *expr = read_const_expr(rest, tok->next);
expr = preprocess2(expr);
if (expr->kind == TK_EOF) error_tok(start, "no expression");
// [C18 6.10.1.4] The standard requires we replace remaining
// non-macro identifiers with "0" before evaluating a constant
// expression. For example, `#if foo` is equivalent to `#if 0`
// if foo is not defined.
// [https://www.sigbus.info/n1570#6.10.1p4] The standard requires
// we replace remaining non-macro identifiers with "0" before
// evaluating a constant expression. For example, `#if foo` is
// equivalent to `#if 0` if foo is not defined.
for (Token *t = expr; t->kind != TK_EOF; t = t->next) {
if (t->kind == TK_IDENT) {
Token *next = t->next;
@ -453,8 +452,7 @@ static Token *stringize(Token *hash, Token *arg) {
// Concatenate two tokens to create a new token.
static Token *paste(Token *lhs, Token *rhs) {
// Paste the two tokens.
char *buf = calloc(1, lhs->len + rhs->len + 1);
sprintf(buf, "%.*s%.*s", lhs->len, lhs->loc, rhs->len, rhs->loc);
char *buf = xasprintf("%.*s%.*s", lhs->len, lhs->loc, rhs->len, rhs->loc);
// Tokenize the resulting string.
Token *tok = tokenize(new_file(lhs->file->name, lhs->file->file_no, buf));
if (tok->next->kind != TK_EOF)
@ -706,7 +704,7 @@ static char *detect_include_guard(Token *tok) {
return NULL;
}
static Token *include_file(Token *tok, char *path) {
static Token *include_file(Token *tok, char *path, Token *filename_tok) {
// Check for "#pragma once"
if (hashmap_get(&pragma_once, path)) return tok;
// If we read the same file before, and if the file was guarded
@ -716,7 +714,8 @@ static Token *include_file(Token *tok, char *path) {
char *guard_name = hashmap_get(&include_guards, path);
if (guard_name && hashmap_get(&macros, guard_name)) return tok;
Token *tok2 = tokenize_file(path);
if (!tok2) error_tok(tok, "%s: cannot open file: %s", path, strerror(errno));
if (!tok2)
error_tok(filename_tok, "%s: cannot open file: %s", path, strerror(errno));
guard_name = detect_include_guard(tok2);
if (guard_name) hashmap_put(&include_guards, path, guard_name);
return append(tok2, tok);
@ -760,19 +759,19 @@ static Token *preprocess2(Token *tok) {
char *path =
xasprintf("%s/%s", dirname(strdup(start->file->name)), filename);
if (fileexists(path)) {
tok = include_file(tok, path);
tok = include_file(tok, path, start->next->next);
continue;
}
}
char *path = search_include_paths(filename);
tok = include_file(tok, path ? path : filename);
tok = include_file(tok, path ? path : filename, start->next->next);
continue;
}
if (EQUAL(tok, "include_next")) {
bool ignore;
char *filename = read_include_filename(&tok, tok->next, &ignore);
char *path = search_include_next(filename);
tok = include_file(tok, path ? path : filename);
tok = include_file(tok, path ? path : filename, start->next->next);
continue;
}
if (EQUAL(tok, "define")) {
@ -914,17 +913,13 @@ static char *format_date(struct tm *tm) {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
};
char buf[30];
sprintf(buf, "\"%s %2d %d\"", mon[tm->tm_mon], tm->tm_mday,
tm->tm_year + 1900);
return strdup(buf);
return xasprintf("\"%s %2d %d\"", mon[tm->tm_mon], tm->tm_mday,
tm->tm_year + 1900);
}
// __TIME__ is expanded to the current time, e.g. "13:34:03".
static char *format_time(struct tm *tm) {
char buf[30];
sprintf(buf, "\"%02d:%02d:%02d\"", tm->tm_hour, tm->tm_min, tm->tm_sec);
return strdup(buf);
return xasprintf("\"%02d:%02d:%02d\"", tm->tm_hour, tm->tm_min, tm->tm_sec);
}
void init_macros(void) {
@ -1302,11 +1297,11 @@ static void join_adjacent_string_literals(Token *tok) {
}
// Second pass: concatenate adjacent string literals.
for (Token *tok1 = tok; tok1->kind != TK_EOF;) {
Token *tok2 = tok1->next;
if (tok1->kind != TK_STR || tok2->kind != TK_STR) {
if (tok1->kind != TK_STR || tok1->next->kind != TK_STR) {
tok1 = tok1->next;
continue;
}
#if 0
assert(tok1->ty->base->size == tok2->ty->base->size);
Token *t = copy_token(tok1);
t->ty =
@ -1317,6 +1312,25 @@ static void join_adjacent_string_literals(Token *tok) {
tok2->str, tok2->ty->size);
t->len = strlen(t->loc);
*tok1 = *t;
#else
Token *tok2 = tok1->next;
while (tok2->kind == TK_STR) tok2 = tok2->next;
int len = tok1->ty->array_len;
for (Token *t = tok1->next; t != tok2; t = t->next) {
len = len + t->ty->array_len - 1;
}
char *buf = calloc(tok1->ty->base->size, len);
int i = 0;
for (Token *t = tok1; t != tok2; t = t->next) {
memcpy(buf + i, t->str, t->ty->size);
i = i + t->ty->size - t->ty->base->size;
}
*tok1 = *copy_token(tok1);
tok1->ty = array_of(tok1->ty->base, len);
tok1->str = buf;
tok1->next = tok2;
tok1 = tok2;
#endif
}
}

View File

@ -1,4 +0,0 @@
main(void) {
void *p;
p = "hello";
}

View File

@ -445,6 +445,15 @@ int main() {
};
x.a;
}));
ASSERT(1, ({
union {
int a;
char b;
} x = {
1,
};
x.a;
}));
ASSERT(2, ({
enum {
x,

View File

@ -392,6 +392,24 @@ int main() {
} x = {1}, y = {2};
(0 ? x : y).a;
}));
ASSERT(2, ({
struct {
int a;
} x = {1}, y = {2};
(x = y).a;
}));
ASSERT(1, ({
struct {
int a;
} x = {1}, y = {2};
(1 ? x : y).a;
}));
ASSERT(2, ({
struct {
int a;
} x = {1}, y = {2};
(0 ? x : y).a;
}));
return 0;
}

View File

@ -38,7 +38,7 @@ static void verror_at(char *filename, char *input, int line_no, char *loc,
int indent = fprintf(stderr, "%s:%d: ", filename, line_no);
fprintf(stderr, "%.*s\n", (int)(end - line), line);
// Show the error message.
int pos = str_width(line, loc - line) + indent;
int pos = display_width(line, loc - line) + indent;
fprintf(stderr, "%*s", pos, ""); // print pos spaces.
fprintf(stderr, "^ ");
vfprintf(stderr, fmt, ap);
@ -53,6 +53,7 @@ void error_at(char *loc, char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
verror_at(current_file->name, current_file->contents, line_no, loc, fmt, ap);
va_end(ap);
exit(1);
}
@ -64,7 +65,7 @@ void error_tok(Token *tok, char *fmt, ...) {
verror_at(t->file->name, t->file->contents, t->line_no, t->loc, fmt, ap);
va_end(ap);
}
va_end(va);
va_end(ap);
exit(1);
}
@ -73,6 +74,7 @@ void warn_tok(Token *tok, char *fmt, ...) {
va_start(ap, fmt);
verror_at(tok->file->name, tok->file->contents, tok->line_no, tok->loc, fmt,
ap);
va_end(ap);
}
static int is_space(int c) {
@ -103,9 +105,9 @@ Token *skip(Token *tok, char op) {
}
}
// Create a new token and add it as the next token of `cur`.
// Create a new token.
static Token *new_token(TokenKind kind, char *start, char *end) {
Token *tok = calloc(1, sizeof(Token));
Token *tok = alloc_token();
tok->kind = kind;
tok->loc = start;
tok->len = end - start;
@ -117,18 +119,17 @@ static Token *new_token(TokenKind kind, char *start, char *end) {
return tok;
}
// Read an identifier and returns a pointer pointing to the end
// of an identifier.
//
// Returns null if p does not point to a valid identifier.
static char *read_ident(char *p) {
// Read an identifier and returns the length of it.
// If p does not point to a valid identifier, 0 is returned.
static int read_ident(char *start) {
char *p = start;
uint32_t c = decode_utf8(&p, p);
if (!is_ident1(c)) return NULL;
if (!is_ident1(c)) return 0;
for (;;) {
char *q;
c = decode_utf8(&q, p);
if (!('a' <= c && c <= 'f') && !is_ident2(c)) {
return p;
return p - start;
}
p = q;
}
@ -140,6 +141,19 @@ static int from_hex(char c) {
return c - 'A' + 10;
}
// Read a punctuator token from p and returns its length.
static int read_punct(char *p) {
static char *kw[] = {"<<=", ">>=", "...", "==", "!=", "<=", ">=", "->",
"+=", "-=", "*=", "/=", "++", "--", "%=", "&=",
"|=", "^=", "&&", "||", "<<", ">>", "##"};
for (int i = 0; i < sizeof(kw) / sizeof(*kw); i++) {
if (startswith(p, kw[i])) {
return strlen(kw[i]);
}
}
return ispunct(*p) ? 1 : 0;
}
static bool is_keyword(Token *tok) {
static HashMap map;
if (map.capacity == 0) {
@ -190,6 +204,17 @@ static int read_escaped_char(char **new_pos, char *p) {
return c;
}
*new_pos = p + 1;
// Escape sequences are defined using themselves here. E.g.
// '\n' is implemented using '\n'. This tautological definition
// works because the compiler that compiles our compiler knows
// what '\n' actually is. In other words, we "inherit" the ASCII
// code of '\n' from the compiler that compiles our compiler,
// so we don't have to teach the actual code here.
//
// This fact has huge implications not only for the correctness
// of the compiler but also for the security of the generated code.
// For more info, read "Reflections on Trusting Trust" by Ken Thompson.
// https://github.com/rui314/chibicc/wiki/thompson1984.pdf
switch (*p) {
case 'a':
return '\a';
@ -217,7 +242,7 @@ static int read_escaped_char(char **new_pos, char *p) {
static char *string_literal_end(char *p) {
char *start = p;
for (; *p != '"'; p++) {
if (*p == '\0') error_at(start, "unclosed string literal");
if (*p == '\n' || *p == '\0') error_at(start, "unclosed string literal");
if (*p == '\\') p++;
}
return p;
@ -225,7 +250,7 @@ static char *string_literal_end(char *p) {
static Token *read_string_literal(char *start, char *quote) {
char *end = string_literal_end(quote + 1);
char *buf = calloc(1, end - quote);
char *buf = calloc(2, end - quote);
int len = 0;
for (char *p = quote + 1; p < end;) {
if (*p == '\\')
@ -409,7 +434,7 @@ static void convert_pp_number(Token *tok) {
void convert_pp_tokens(Token *tok) {
for (Token *t = tok; t->kind != TK_EOF; t = t->next) {
if (is_keyword(t))
t->kind = TK_RESERVED;
t->kind = TK_KEYWORD;
else if (t->kind == TK_PP_NUM)
convert_pp_number(t);
}
@ -546,34 +571,17 @@ Token *tokenize(File *file) {
continue;
}
// Identifier or keyword
char *q;
if ((q = read_ident(p)) != NULL) {
cur = cur->next = new_token(TK_IDENT, p, q);
p = q;
int ident_len = read_ident(p);
if (ident_len) {
cur = cur->next = new_token(TK_IDENT, p, p + ident_len);
p += cur->len;
continue;
}
// Three-letter punctuators
if (LOOKINGAT(p, "<<=") || LOOKINGAT(p, ">>=") || LOOKINGAT(p, "...")) {
cur = cur->next = new_token(TK_RESERVED, p, p + 3);
p += 3;
continue;
}
// Two-letter punctuators
if (LOOKINGAT(p, "==") || LOOKINGAT(p, "!=") || LOOKINGAT(p, "<=") ||
LOOKINGAT(p, ">=") || LOOKINGAT(p, "->") || LOOKINGAT(p, "+=") ||
LOOKINGAT(p, "-=") || LOOKINGAT(p, "*=") || LOOKINGAT(p, "/=") ||
LOOKINGAT(p, "++") || LOOKINGAT(p, "--") || LOOKINGAT(p, "%=") ||
LOOKINGAT(p, "&=") || LOOKINGAT(p, "|=") || LOOKINGAT(p, "^=") ||
LOOKINGAT(p, "&&") || LOOKINGAT(p, "||") || LOOKINGAT(p, "<<") ||
LOOKINGAT(p, ">>") || LOOKINGAT(p, "##")) {
cur = cur->next = new_token(TK_RESERVED, p, p + 2);
p += 2;
continue;
}
// Single-letter punctuators
if (ispunct(*p)) {
cur = cur->next = new_token(TK_RESERVED, p, p + 1);
p++;
// Punctuators
int punct_len = read_punct(p);
if (punct_len) {
cur = cur->next = new_token(TK_PUNCT, p, p + punct_len);
p += cur->len;
continue;
}
error_at(p, "invalid token");
@ -665,6 +673,7 @@ static void remove_backslash_newline(char *p) {
p[j++] = p[i++];
}
}
for (; n > 0; n--) p[j++] = '\n';
p[j] = '\0';
}
@ -710,6 +719,11 @@ static void convert_universal_chars(char *p) {
Token *tokenize_file(char *path) {
char *p = read_file(path);
if (!p) return NULL;
// UTF-8 texts may start with a 3-byte "BOM" marker sequence.
// If exists, just skip them because they are useless bytes.
// (It is actually not recommended to add BOM markers to UTF-8
// texts, but it's not uncommon particularly on Windows.)
if (!memcmp(p, "\xef\xbb\xbf", 3)) p += 3;
canonicalize_newline(p);
remove_backslash_newline(p);
convert_universal_chars(p);

View File

@ -18,7 +18,7 @@ Type ty_double[1] = {{TY_DOUBLE, 8, 8}};
Type ty_ldouble[1] = {{TY_LDOUBLE, 16, 16}};
static Type *new_type(TypeKind kind, int size, int align) {
Type *ty = calloc(1, sizeof(Type));
Type *ty = alloc_type();
ty->kind = kind;
ty->size = size;
ty->align = align;
@ -77,7 +77,7 @@ bool is_compatible(Type *t1, Type *t2) {
}
Type *copy_type(Type *ty) {
Type *ret = calloc(1, sizeof(Type));
Type *ret = alloc_type();
*ret = *ty;
ret->origin = ty;
return ret;

View File

@ -66,9 +66,9 @@ static bool in_range(uint32_t *range, uint32_t c) {
return false;
}
// C11 allows not only ASCII but some multibyte characters in certan
// Unicode ranges to be used in an identifier. See C11 Annex D for the
// details.
// [https://www.sigbus.info/n1570#D] C11 allows not only ASCII but
// some multibyte characters in certan Unicode ranges to be used in an
// identifier.
//
// This function returns true if a given character is acceptable as
// the first character of an identifier.
@ -108,7 +108,7 @@ bool is_ident2(uint32_t c) {
// Returns the number of columns needed to display a given
// string in a fixed-width font.
int str_width(char *p, int len) {
int display_width(char *p, int len) {
char *start = p;
int w = 0;
while (p - start < len) {