Integrate more chibicc changes

main
Justine Tunney 2020-12-09 13:53:02 -08:00
parent 2ed7956be4
commit 15280753e2
13 changed files with 293 additions and 182 deletions

44
third_party/chibicc/alloc.c vendored 100644
View File

@ -0,0 +1,44 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "third_party/chibicc/chibicc.h"
long alloc_node_count;
long alloc_token_count;
long alloc_obj_count;
long alloc_type_count;
Node *alloc_node(void) {
++alloc_node_count;
return calloc(1, sizeof(Node));
}
Token *alloc_token(void) {
++alloc_token_count;
return calloc(1, sizeof(Token));
}
Obj *alloc_obj(void) {
++alloc_obj_count;
return calloc(1, sizeof(Obj));
}
Type *alloc_type(void) {
++alloc_type_count;
return calloc(1, sizeof(Type));
}

View File

@ -141,7 +141,16 @@ static char *quote_makefile(char *s) {
static void PrintMemoryUsage(void) { static void PrintMemoryUsage(void) {
struct mallinfo mi; struct mallinfo mi;
mi = mallinfo(); mi = mallinfo();
fprintf(stderr, "\n");
fprintf(stderr, "allocated %,ld bytes of memory\n", mi.arena); fprintf(stderr, "allocated %,ld bytes of memory\n", mi.arena);
fprintf(stderr, "allocated %,ld nodes (%,ld bytes)\n", alloc_node_count,
sizeof(Node) * alloc_node_count);
fprintf(stderr, "allocated %,ld tokens (%,ld bytes)\n", alloc_token_count,
sizeof(Token) * alloc_token_count);
fprintf(stderr, "allocated %,ld objs (%,ld bytes)\n", alloc_obj_count,
sizeof(Obj) * alloc_obj_count);
fprintf(stderr, "allocated %,ld types (%,ld bytes)\n", alloc_type_count,
sizeof(Type) * alloc_type_count);
} }
static void strarray_push_comma(StringArray *a, char *s) { static void strarray_push_comma(StringArray *a, char *s) {

View File

@ -62,14 +62,14 @@ void strarray_push(StringArray *, char *);
// tokenize.c // tokenize.c
// //
// Token
typedef enum { typedef enum {
TK_RESERVED, // Keywords or punctuators TK_IDENT, // Identifiers
TK_IDENT, // Identifiers TK_PUNCT, // Punctuators
TK_STR, // String literals TK_KEYWORD, // Keywords
TK_NUM, // Numeric literals TK_STR, // String literals
TK_PP_NUM, // Preprocessing numbers TK_NUM, // Numeric literals
TK_EOF, // End-of-file markers TK_PP_NUM, // Preprocessing numbers
TK_EOF, // End-of-file markers
} TokenKind; } TokenKind;
struct File { struct File {
@ -81,15 +81,14 @@ struct File {
int line_delta; int line_delta;
}; };
// Token type struct thatispacked Token {
struct Token { Token *next; // Next token
TokenKind kind; // Token kind
int len; // Token length int len; // Token length
int line_no; // Line number int line_no; // Line number
int line_delta; // Line number int line_delta; // Line number
TokenKind kind; // Token kind
bool at_bol; // True if this token is at beginning of line bool at_bol; // True if this token is at beginning of line
bool has_space; // True if this token follows a space character bool has_space; // True if this token follows a space character
Token *next; // Next token
char *loc; // Token location char *loc; // Token location
Type *ty; // Used if TK_NUM or TK_STR Type *ty; // Used if TK_NUM or TK_STR
File *file; // Source location File *file; // Source location
@ -518,7 +517,7 @@ int encode_utf8(char *, uint32_t);
uint32_t decode_utf8(char **, char *); uint32_t decode_utf8(char **, char *);
bool is_ident1(uint32_t); bool is_ident1(uint32_t);
bool is_ident2(uint32_t); bool is_ident2(uint32_t);
int str_width(char *, int); int display_width(char *, int);
// //
// hashmap.c // hashmap.c
@ -564,6 +563,20 @@ extern bool opt_sse4;
extern bool opt_verbose; extern bool opt_verbose;
extern char *base_file; extern char *base_file;
//
// alloc.c
//
extern long alloc_node_count;
extern long alloc_token_count;
extern long alloc_obj_count;
extern long alloc_type_count;
Node *alloc_node(void);
Token *alloc_token(void);
Obj *alloc_obj(void);
Type *alloc_type(void);
COSMOPOLITAN_C_END_ COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_THIRD_PARTY_CHIBICC_CHIBICC_H_ */ #endif /* COSMOPOLITAN_THIRD_PARTY_CHIBICC_CHIBICC_H_ */

View File

@ -2219,7 +2219,7 @@ static void emit_data(Obj *prog) {
int align = (var->ty->kind == TY_ARRAY && var->ty->size >= 16) int align = (var->ty->kind == TY_ARRAY && var->ty->size >= 16)
? MAX(16, var->align) ? MAX(16, var->align)
: var->align; : var->align;
if (opt_common && var->is_tentative && !var->is_tls) { if (opt_common && var->is_tentative) {
println("\t.comm\t%s,%d,%d", nameof(var), var->ty->size, align); println("\t.comm\t%s,%d,%d", nameof(var), var->ty->size, align);
} else { } else {
if (var->section) { if (var->section) {
@ -2410,10 +2410,10 @@ static void emit_text(Obj *prog) {
// Emit code // Emit code
gen_stmt(fn->body); gen_stmt(fn->body);
assert(!depth); assert(!depth);
// The C spec defines a special rule for the main function. // [https://www.sigbus.info/n1570#5.1.2.2.3p1] The C spec defines
// Reaching the end of the main function is equivalent to // a special rule for the main function. Reaching the end of the
// returning 0, even though the behavior is undefined for the // main function is equivalent to returning 0, even though the
// other functions. See C11 5.1.2.2.3. // behavior is undefined for the other functions.
if (strcmp(nameof(fn), "main") == 0) { if (strcmp(nameof(fn), "main") == 0) {
emitlin("\txor\t%eax,%eax"); emitlin("\txor\t%eax,%eax");
} }

View File

@ -2,7 +2,10 @@
#include "third_party/chibicc/chibicc.h" #include "third_party/chibicc/chibicc.h"
#define TOMBSTONE ((void *)-1) // Represents a deleted hash entry #define INIT_SIZE 16 // initial hash bucket size
#define LOW_WATERMARK 50 // keep usage below 50% after rehashing
#define HIGH_WATERMARK 70 // perform rehash when usage exceeds 70%
#define TOMBSTONE ((void *)-1) // represents deleted hash table entry
static uint64_t fnv_hash(char *s, int len) { static uint64_t fnv_hash(char *s, int len) {
uint64_t hash = 0xcbf29ce484222325; uint64_t hash = 0xcbf29ce484222325;
@ -24,7 +27,8 @@ static void rehash(HashMap *map) {
} }
} }
size_t cap = map->capacity; size_t cap = map->capacity;
while ((nkeys * 100) / cap >= 50) cap = cap * 2; while ((nkeys * 100) / cap >= LOW_WATERMARK) cap = cap * 2;
assert(cap > 0);
// Create a new hashmap and copy all key-values. // Create a new hashmap and copy all key-values.
HashMap map2 = {}; HashMap map2 = {};
map2.buckets = calloc(cap, sizeof(HashEntry)); map2.buckets = calloc(cap, sizeof(HashEntry));
@ -56,9 +60,11 @@ static HashEntry *get_entry(HashMap *map, char *key, int keylen) {
static HashEntry *get_or_insert_entry(HashMap *map, char *key, int keylen) { static HashEntry *get_or_insert_entry(HashMap *map, char *key, int keylen) {
if (!map->buckets) { if (!map->buckets) {
map->buckets = calloc((map->capacity = 16), sizeof(HashEntry)); map->buckets = calloc(INIT_SIZE, sizeof(HashEntry));
map->capacity = INIT_SIZE;
} else if ((map->used * 100) / map->capacity >= HIGH_WATERMARK) {
rehash(map);
} }
if ((map->used * 100) / map->capacity >= 70) rehash(map);
uint64_t hash = fnv_hash(key, keylen); uint64_t hash = fnv_hash(key, keylen);
for (int i = 0; i < map->capacity; i++) { for (int i = 0; i < map->capacity; i++) {
HashEntry *ent = &map->buckets[(hash + i) & (map->capacity - 1)]; HashEntry *ent = &map->buckets[(hash + i) & (map->capacity - 1)];

View File

@ -25,21 +25,13 @@ typedef struct Scope Scope;
// Scope for local variables, global variables, typedefs // Scope for local variables, global variables, typedefs
// or enum constants // or enum constants
typedef struct { typedef struct {
char *name;
int depth;
Obj *var; Obj *var;
Type *type_def; Type *type_def;
Type *enum_ty; Type *enum_ty;
int enum_val; int enum_val;
} VarScope; } VarScope;
// Scope for struct, union or enum tags // Represents a block scope.
typedef struct {
char *name;
int depth;
Type *ty;
} TagScope;
struct Scope { struct Scope {
Scope *next; Scope *next;
// C has two block scopes; one is for variables/typedefs and // C has two block scopes; one is for variables/typedefs and
@ -103,10 +95,6 @@ static Obj *globals;
static Scope *scope = &(Scope){}; static Scope *scope = &(Scope){};
// scope_depth is incremented by one at the beginning of a block
// scope and decremented by one at the end of a block scope.
static int scope_depth;
// Points to the function object the parser is currently parsing. // Points to the function object the parser is currently parsing.
static Obj *current_fn; static Obj *current_fn;
@ -173,12 +161,10 @@ static void enter_scope(void) {
Scope *sc = calloc(1, sizeof(Scope)); Scope *sc = calloc(1, sizeof(Scope));
sc->next = scope; sc->next = scope;
scope = sc; scope = sc;
scope_depth++;
} }
static void leave_scope(void) { static void leave_scope(void) {
scope = scope->next; scope = scope->next;
scope_depth--;
} }
// Find a variable by name. // Find a variable by name.
@ -190,16 +176,16 @@ static VarScope *find_var(Token *tok) {
return NULL; return NULL;
} }
static TagScope *find_tag(Token *tok) { static Type *find_tag(Token *tok) {
for (Scope *sc = scope; sc; sc = sc->next) { for (Scope *sc = scope; sc; sc = sc->next) {
TagScope *sc2 = hashmap_get2(&sc->tags, tok->loc, tok->len); Type *ty = hashmap_get2(&sc->tags, tok->loc, tok->len);
if (sc2) return sc2; if (ty) return ty;
} }
return NULL; return NULL;
} }
Node *new_node(NodeKind kind, Token *tok) { Node *new_node(NodeKind kind, Token *tok) {
Node *node = calloc(1, sizeof(Node)); Node *node = alloc_node();
node->kind = kind; node->kind = kind;
node->tok = tok; node->tok = tok;
return node; return node;
@ -252,7 +238,7 @@ static Node *new_vla_ptr(Obj *var, Token *tok) {
Node *new_cast(Node *expr, Type *ty) { Node *new_cast(Node *expr, Type *ty) {
add_type(expr); add_type(expr);
Node *node = calloc(1, sizeof(Node)); Node *node = alloc_node();
node->kind = ND_CAST; node->kind = ND_CAST;
node->tok = expr->tok; node->tok = expr->tok;
node->lhs = expr; node->lhs = expr;
@ -262,8 +248,6 @@ Node *new_cast(Node *expr, Type *ty) {
static VarScope *push_scope(char *name) { static VarScope *push_scope(char *name) {
VarScope *sc = calloc(1, sizeof(VarScope)); VarScope *sc = calloc(1, sizeof(VarScope));
sc->name = name;
sc->depth = scope_depth;
hashmap_put(&scope->vars, name, sc); hashmap_put(&scope->vars, name, sc);
return sc; return sc;
} }
@ -303,7 +287,7 @@ static Initializer *new_initializer(Type *ty, bool is_flexible) {
} }
static Obj *new_var(char *name, Type *ty) { static Obj *new_var(char *name, Type *ty) {
Obj *var = calloc(1, sizeof(Obj)); Obj *var = alloc_obj();
var->name = name; var->name = name;
var->ty = ty; var->ty = ty;
var->align = ty->align; var->align = ty->align;
@ -330,9 +314,7 @@ static Obj *new_gvar(char *name, Type *ty) {
static char *new_unique_name(void) { static char *new_unique_name(void) {
static int id = 0; static int id = 0;
char *buf = calloc(1, 20); return xasprintf(".L..%d", id++);
sprintf(buf, ".L..%d", id++);
return buf;
} }
static Obj *new_anon_gvar(Type *ty) { static Obj *new_anon_gvar(Type *ty) {
@ -360,11 +342,7 @@ static Type *find_typedef(Token *tok) {
} }
static void push_tag_scope(Token *tok, Type *ty) { static void push_tag_scope(Token *tok, Type *ty) {
TagScope *sc = calloc(1, sizeof(TagScope)); hashmap_put2(&scope->tags, tok->loc, tok->len, ty);
sc->name = strndup(tok->loc, tok->len);
sc->depth = scope_depth;
sc->ty = ty;
hashmap_put2(&scope->tags, tok->loc, tok->len, sc);
} }
// Consumes token if equal to STR or __STR__. // Consumes token if equal to STR or __STR__.
@ -599,9 +577,14 @@ static Token *thing_attributes(Token *tok, void *arg) {
error_tok(tok, "unknown function attribute"); error_tok(tok, "unknown function attribute");
} }
// typespec = typename typename* // declspec = ("void" | "_Bool" | "char" | "short" | "int" | "long"
// typename = "void" | "_Bool" | "char" | "short" | "int" | "long" // | "typedef" | "static" | "extern" | "inline"
// | struct-decl | union-decl | typedef-name // | "_Thread_local" | "__thread"
// | "signed" | "unsigned"
// | struct-decl | union-decl | typedef-name
// | enum-specifier | typeof-specifier
// | "const" | "volatile" | "auto" | "register" | "restrict"
// | "__restrict" | "__restrict__" | "_Noreturn")+
// //
// The order of typenames in a type-specifier doesn't matter. For // The order of typenames in a type-specifier doesn't matter. For
// example, `int long static` means the same as `static long int`. // example, `int long static` means the same as `static long int`.
@ -614,7 +597,7 @@ static Token *thing_attributes(Token *tok, void *arg) {
// while keeping the "current" type object that the typenames up // while keeping the "current" type object that the typenames up
// until that point represent. When we reach a non-typename token, // until that point represent. When we reach a non-typename token,
// we returns the current type object. // we returns the current type object.
static Type *typespec(Token **rest, Token *tok, VarAttr *attr) { static Type *declspec(Token **rest, Token *tok, VarAttr *attr) {
// We use a single integer as counters for all typenames. // We use a single integer as counters for all typenames.
// For example, bits 0 and 1 represents how many times we saw the // For example, bits 0 and 1 represents how many times we saw the
// keyword "void" so far. With this, we can use a switch statement // keyword "void" so far. With this, we can use a switch statement
@ -851,7 +834,7 @@ static Token *static_assertion(Token *tok) {
} }
// func-params = ("void" | param ("," param)* ("," "...")?)? ")" // func-params = ("void" | param ("," param)* ("," "...")?)? ")"
// param = typespec declarator // param = declspec declarator
static Type *func_params(Token **rest, Token *tok, Type *ty) { static Type *func_params(Token **rest, Token *tok, Type *ty) {
if (EQUAL(tok, "void") && EQUAL(tok->next, ")")) { if (EQUAL(tok, "void") && EQUAL(tok->next, ")")) {
*rest = tok->next->next; *rest = tok->next->next;
@ -868,7 +851,7 @@ static Type *func_params(Token **rest, Token *tok, Type *ty) {
skip(tok, ')'); skip(tok, ')');
break; break;
} }
Type *ty2 = typespec(&tok, tok, NULL); Type *ty2 = declspec(&tok, tok, NULL);
ty2 = declarator(&tok, tok, ty2); ty2 = declarator(&tok, tok, ty2);
Token *name = ty2->name; Token *name = ty2->name;
if (ty2->kind == TY_ARRAY) { if (ty2->kind == TY_ARRAY) {
@ -935,8 +918,8 @@ static Type *declarator(Token **rest, Token *tok, Type *ty) {
ty = pointers(&tok, tok, ty); ty = pointers(&tok, tok, ty);
if (EQUAL(tok, "(")) { if (EQUAL(tok, "(")) {
Token *start = tok; Token *start = tok;
Type ignore = {}; Type dummy = {};
declarator(&tok, tok->next, &ignore); declarator(&tok, start->next, &dummy);
tok = skip(tok, ')'); tok = skip(tok, ')');
ty = type_suffix(rest, tok, ty); ty = type_suffix(rest, tok, ty);
ty = declarator(&tok, start->next, ty); ty = declarator(&tok, start->next, ty);
@ -959,8 +942,8 @@ static Type *abstract_declarator(Token **rest, Token *tok, Type *ty) {
ty = pointers(&tok, tok, ty); ty = pointers(&tok, tok, ty);
if (EQUAL(tok, "(")) { if (EQUAL(tok, "(")) {
Token *start = tok; Token *start = tok;
Type ignore = {}; Type dummy = {};
abstract_declarator(&tok, tok->next, &ignore); abstract_declarator(&tok, start->next, &dummy);
tok = skip(tok, ')'); tok = skip(tok, ')');
ty = type_suffix(rest, tok, ty); ty = type_suffix(rest, tok, ty);
return abstract_declarator(&tok, start->next, ty); return abstract_declarator(&tok, start->next, ty);
@ -968,9 +951,9 @@ static Type *abstract_declarator(Token **rest, Token *tok, Type *ty) {
return type_suffix(rest, tok, ty); return type_suffix(rest, tok, ty);
} }
// type-name = typespec abstract-declarator // type-name = declspec abstract-declarator
static Type *typename(Token **rest, Token *tok) { static Type *typename(Token **rest, Token *tok) {
Type *ty = typespec(&tok, tok, NULL); Type *ty = declspec(&tok, tok, NULL);
return abstract_declarator(rest, tok, ty); return abstract_declarator(rest, tok, ty);
} }
@ -1003,11 +986,11 @@ static Type *enum_specifier(Token **rest, Token *tok) {
tok = tok->next; tok = tok->next;
} }
if (tag && !EQUAL(tok, "{")) { if (tag && !EQUAL(tok, "{")) {
TagScope *sc = find_tag(tag); Type *ty = find_tag(tag);
if (!sc) error_tok(tag, "unknown enum type"); if (!ty) error_tok(tag, "unknown enum type");
if (sc->ty->kind != TY_ENUM) error_tok(tag, "not an enum tag"); if (ty->kind != TY_ENUM) error_tok(tag, "not an enum tag");
*rest = tok; *rest = tok;
return sc->ty; return ty;
} }
tok = skip(tok, '{'); tok = skip(tok, '{');
// Read an enum-list. // Read an enum-list.
@ -1070,8 +1053,8 @@ static Node *new_alloca(Node *sz) {
return node; return node;
} }
// declaration = typespec (declarator ("=" expr)? ("," declarator ("=" // declaration = declspec (declarator ("=" expr)?
// expr)?)*)? ";" // ("," declarator ("=" expr)?)*)? ";"
static Node *declaration(Token **rest, Token *tok, Type *basety, static Node *declaration(Token **rest, Token *tok, Type *basety,
VarAttr *attr) { VarAttr *attr) {
Node head = {}; Node head = {};
@ -1363,9 +1346,11 @@ static void struct_initializer1(Token **rest, Token *tok, Initializer *init) {
// struct-initializer2 = initializer ("," initializer)* // struct-initializer2 = initializer ("," initializer)*
static void struct_initializer2(Token **rest, Token *tok, Initializer *init, static void struct_initializer2(Token **rest, Token *tok, Initializer *init,
Member *mem) { Member *mem) {
bool first = true;
for (; mem && !is_end(tok); mem = mem->next) { for (; mem && !is_end(tok); mem = mem->next) {
Token *start = tok; Token *start = tok;
if (mem != init->ty->members) tok = skip(tok, ','); if (!first) tok = skip(tok, ',');
first = false;
if (EQUAL(tok, "[") || EQUAL(tok, ".")) { if (EQUAL(tok, "[") || EQUAL(tok, ".")) {
*rest = start; *rest = start;
return; return;
@ -1389,6 +1374,7 @@ static void union_initializer(Token **rest, Token *tok, Initializer *init) {
init->mem = init->ty->members; init->mem = init->ty->members;
if (EQUAL(tok, "{")) { if (EQUAL(tok, "{")) {
initializer2(&tok, tok->next, init->children[0]); initializer2(&tok, tok->next, init->children[0]);
CONSUME(&tok, tok, ",");
*rest = skip(tok, '}'); *rest = skip(tok, '}');
} else { } else {
initializer2(rest, tok, init->children[0]); initializer2(rest, tok, init->children[0]);
@ -1769,7 +1755,7 @@ static Node *stmt(Token **rest, Token *tok) {
brk_label = node->brk_label = new_unique_name(); brk_label = node->brk_label = new_unique_name();
cont_label = node->cont_label = new_unique_name(); cont_label = node->cont_label = new_unique_name();
if (is_typename(tok)) { if (is_typename(tok)) {
Type *basety = typespec(&tok, tok, NULL); Type *basety = declspec(&tok, tok, NULL);
node->init = declaration(&tok, tok, basety, NULL); node->init = declaration(&tok, tok, basety, NULL);
} else { } else {
node->init = expr_stmt(&tok, tok); node->init = expr_stmt(&tok, tok);
@ -1872,7 +1858,7 @@ static Node *compound_stmt(Token **rest, Token *tok) {
while (!EQUAL(tok, "}")) { while (!EQUAL(tok, "}")) {
if (is_typename(tok) && !EQUAL(tok->next, ":")) { if (is_typename(tok) && !EQUAL(tok->next, ":")) {
VarAttr attr = {}; VarAttr attr = {};
Type *basety = typespec(&tok, tok, &attr); Type *basety = declspec(&tok, tok, &attr);
if (attr.is_typedef) { if (attr.is_typedef) {
tok = parse_typedef(tok, basety); tok = parse_typedef(tok, basety);
continue; continue;
@ -2565,30 +2551,14 @@ static Node *mul(Token **rest, Token *tok) {
} }
} }
// compound-literal = initializer "}" // cast = "(" type-name ")" cast | unary
static Node *compound_literal(Token **rest, Token *tok, Type *ty,
Token *start) {
if (scope_depth == 0) {
Obj *var = new_anon_gvar(ty);
gvar_initializer(rest, tok, var);
return new_var_node(var, start);
}
Obj *var = new_lvar(new_unique_name(), ty);
Node *lhs = lvar_initializer(rest, tok, var);
Node *rhs = new_var_node(var, tok);
return new_binary(ND_COMMA, lhs, rhs, tok);
}
// cast = "(" type-name ")" "{" compound-literal
// | "(" type-name ")" cast
// | unary
static Node *cast(Token **rest, Token *tok) { static Node *cast(Token **rest, Token *tok) {
if (EQUAL(tok, "(") && is_typename(tok->next)) { if (EQUAL(tok, "(") && is_typename(tok->next)) {
Token *start = tok; Token *start = tok;
Type *ty = typename(&tok, tok->next); Type *ty = typename(&tok, tok->next);
tok = skip(tok, ')'); tok = skip(tok, ')');
// compound literal // compound literal
if (EQUAL(tok, "{")) return compound_literal(rest, tok, ty, start); if (EQUAL(tok, "{")) return unary(rest, start);
// type cast // type cast
Node *node = new_cast(cast(rest, tok), ty); Node *node = new_cast(cast(rest, tok), ty);
node->tok = start; node->tok = start;
@ -2612,9 +2582,10 @@ static Node *unary(Token **rest, Token *tok) {
return new_unary(ND_ADDR, lhs, tok); return new_unary(ND_ADDR, lhs, tok);
} }
if (EQUAL(tok, "*")) { if (EQUAL(tok, "*")) {
// [C18 6.5.3.2p4] This is an oddity in the C spec, but dereferencing // [https://www.sigbus.info/n1570#6.5.3.2p4] This is an oddity
// a function shouldn't do anything. If foo is a function, `*foo`, // in the C spec, but dereferencing a function shouldn't do
// `**foo` or `*****foo` are all equivalent to just `foo`. // anything. If foo is a function, `*foo`, `**foo` or `*****foo`
// are all equivalent to just `foo`.
Node *node = cast(rest, tok->next); Node *node = cast(rest, tok->next);
add_type(node); add_type(node);
if (node->ty->kind == TY_FUNC) return node; if (node->ty->kind == TY_FUNC) return node;
@ -2640,14 +2611,14 @@ static Node *unary(Token **rest, Token *tok) {
return postfix(rest, tok); return postfix(rest, tok);
} }
// struct-members = (typespec declarator ("," declarator)* ";")* // struct-members = (declspec declarator ("," declarator)* ";")*
static void struct_members(Token **rest, Token *tok, Type *ty) { static void struct_members(Token **rest, Token *tok, Type *ty) {
Member head = {}; Member head = {};
Member *cur = &head; Member *cur = &head;
int idx = 0; int idx = 0;
while (!EQUAL(tok, "}")) { while (!EQUAL(tok, "}")) {
VarAttr attr = {}; VarAttr attr = {};
Type *basety = typespec(&tok, tok, &attr); Type *basety = declspec(&tok, tok, &attr);
bool first = true; bool first = true;
// Anonymous struct member // Anonymous struct member
if ((basety->kind == TY_STRUCT || basety->kind == TY_UNION) && if ((basety->kind == TY_STRUCT || basety->kind == TY_UNION) &&
@ -2708,8 +2679,8 @@ static Type *struct_union_decl(Token **rest, Token *tok) {
} }
if (tag && !EQUAL(tok, "{")) { if (tag && !EQUAL(tok, "{")) {
*rest = tok; *rest = tok;
TagScope *sc = find_tag(tag); Type *ty2 = find_tag(tag);
if (sc) return sc->ty; if (ty2) return ty2;
ty->size = -1; ty->size = -1;
push_tag_scope(tag, ty); push_tag_scope(tag, ty);
return ty; return ty;
@ -2721,10 +2692,10 @@ static Type *struct_union_decl(Token **rest, Token *tok) {
if (tag) { if (tag) {
// If this is a redefinition, overwrite a previous type. // If this is a redefinition, overwrite a previous type.
// Otherwise, register the struct type. // Otherwise, register the struct type.
TagScope *sc = find_tag(tag); Type *ty2 = hashmap_get2(&scope->tags, tag->loc, tag->len);
if (sc && sc->depth == scope_depth) { if (ty2) {
*sc->ty = *ty; *ty2 = *ty;
return sc->ty; return ty2;
} }
push_tag_scope(tag, ty); push_tag_scope(tag, ty);
} }
@ -2837,7 +2808,8 @@ static Node *new_inc_dec(Node *node, Token *tok, int addend) {
node->ty); node->ty);
} }
// postfix = ident "(" func-args ")" postfix-tail* // postfix = "(" type-name ")" "{" initializer-list "}"
// | ident "(" func-args ")" postfix-tail*
// | primary postfix-tail* // | primary postfix-tail*
// //
// postfix-tail = "[" expr "]" // postfix-tail = "[" expr "]"
@ -2847,6 +2819,21 @@ static Node *new_inc_dec(Node *node, Token *tok, int addend) {
// | "++" // | "++"
// | "--" // | "--"
static Node *postfix(Token **rest, Token *tok) { static Node *postfix(Token **rest, Token *tok) {
if (EQUAL(tok, "(") && is_typename(tok->next)) {
// Compound literal
Token *start = tok;
Type *ty = typename(&tok, tok->next);
tok = skip(tok, ')');
if (scope->next == NULL) {
Obj *var = new_anon_gvar(ty);
gvar_initializer(rest, tok, var);
return new_var_node(var, start);
}
Obj *var = new_lvar("", ty);
Node *lhs = lvar_initializer(rest, tok, var);
Node *rhs = new_var_node(var, tok);
return new_binary(ND_COMMA, lhs, rhs, start);
}
Node *node = primary(&tok, tok); Node *node = primary(&tok, tok);
for (;;) { for (;;) {
if (EQUAL(tok, "(")) { if (EQUAL(tok, "(")) {
@ -2961,7 +2948,7 @@ static Node *generic_selection(Token **rest, Token *tok) {
return ret; return ret;
} }
// primary = "(" "{" stmt stmt* "}" ")" // primary = "(" "{" stmt+ "}" ")"
// | "(" expr ")" // | "(" expr ")"
// | "sizeof" "(" type-name ")" // | "sizeof" "(" type-name ")"
// | "sizeof" unary // | "sizeof" unary
@ -3367,8 +3354,9 @@ static Token *function(Token *tok, Type *basety, VarAttr *attr) {
fn->va_area = new_lvar("__va_area__", array_of(ty_char, 136)); fn->va_area = new_lvar("__va_area__", array_of(ty_char, 136));
fn->alloca_bottom = new_lvar("__alloca_size__", pointer_to(ty_char)); fn->alloca_bottom = new_lvar("__alloca_size__", pointer_to(ty_char));
tok = skip(tok, '{'); tok = skip(tok, '{');
// [C18 6.4.2.2] "__func__" is automatically defined as a // [https://www.sigbus.info/n1570#6.4.2.2p1] "__func__" is
// local variable containing the current function name. // automatically defined as a local variable containing the
// current function name.
push_scope("__func__")->var = push_scope("__func__")->var =
new_string_literal(fn->name, array_of(ty_char, strlen(fn->name) + 1)); new_string_literal(fn->name, array_of(ty_char, strlen(fn->name) + 1));
// [GNU] __FUNCTION__ is yet another name of __func__. // [GNU] __FUNCTION__ is yet another name of __func__.
@ -3401,7 +3389,7 @@ static Token *global_variable(Token *tok, Type *basety, VarAttr *attr) {
if (attr->align) var->align = attr->align; if (attr->align) var->align = attr->align;
if (EQUAL(tok, "=")) { if (EQUAL(tok, "=")) {
gvar_initializer(&tok, tok->next, var); gvar_initializer(&tok, tok->next, var);
} else if (!attr->is_extern) { } else if (!attr->is_extern && !attr->is_tls) {
var->is_tentative = true; var->is_tentative = true;
} }
} }
@ -3537,7 +3525,7 @@ Obj *parse(Token *tok) {
} }
VarAttr attr = {}; VarAttr attr = {};
tok = attribute_list(tok, &attr, thing_attributes); tok = attribute_list(tok, &attr, thing_attributes);
Type *basety = typespec(&tok, tok, &attr); Type *basety = declspec(&tok, tok, &attr);
if (attr.is_typedef) { if (attr.is_typedef) {
tok = parse_typedef(tok, basety); tok = parse_typedef(tok, basety);
continue; continue;

View File

@ -96,7 +96,7 @@ static Token *skip_line(Token *tok) {
} }
static Token *copy_token(Token *tok) { static Token *copy_token(Token *tok) {
Token *t = calloc(1, sizeof(Token)); Token *t = alloc_token();
*t = *tok; *t = *tok;
t->next = NULL; t->next = NULL;
return t; return t;
@ -234,9 +234,8 @@ static Token *copy_line(Token **rest, Token *tok) {
} }
static Token *new_num_token(int val, Token *tmpl) { static Token *new_num_token(int val, Token *tmpl) {
char buf[30]; char *buf = xasprintf("%d\n", val);
sprintf(buf, "%d\n", val); return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, buf));
return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, strdup(buf)));
} }
static Token *read_const_expr(Token **rest, Token *tok) { static Token *read_const_expr(Token **rest, Token *tok) {
@ -270,10 +269,10 @@ static long eval_const_expr(Token **rest, Token *tok) {
Token *expr = read_const_expr(rest, tok->next); Token *expr = read_const_expr(rest, tok->next);
expr = preprocess2(expr); expr = preprocess2(expr);
if (expr->kind == TK_EOF) error_tok(start, "no expression"); if (expr->kind == TK_EOF) error_tok(start, "no expression");
// [C18 6.10.1.4] The standard requires we replace remaining // [https://www.sigbus.info/n1570#6.10.1p4] The standard requires
// non-macro identifiers with "0" before evaluating a constant // we replace remaining non-macro identifiers with "0" before
// expression. For example, `#if foo` is equivalent to `#if 0` // evaluating a constant expression. For example, `#if foo` is
// if foo is not defined. // equivalent to `#if 0` if foo is not defined.
for (Token *t = expr; t->kind != TK_EOF; t = t->next) { for (Token *t = expr; t->kind != TK_EOF; t = t->next) {
if (t->kind == TK_IDENT) { if (t->kind == TK_IDENT) {
Token *next = t->next; Token *next = t->next;
@ -453,8 +452,7 @@ static Token *stringize(Token *hash, Token *arg) {
// Concatenate two tokens to create a new token. // Concatenate two tokens to create a new token.
static Token *paste(Token *lhs, Token *rhs) { static Token *paste(Token *lhs, Token *rhs) {
// Paste the two tokens. // Paste the two tokens.
char *buf = calloc(1, lhs->len + rhs->len + 1); char *buf = xasprintf("%.*s%.*s", lhs->len, lhs->loc, rhs->len, rhs->loc);
sprintf(buf, "%.*s%.*s", lhs->len, lhs->loc, rhs->len, rhs->loc);
// Tokenize the resulting string. // Tokenize the resulting string.
Token *tok = tokenize(new_file(lhs->file->name, lhs->file->file_no, buf)); Token *tok = tokenize(new_file(lhs->file->name, lhs->file->file_no, buf));
if (tok->next->kind != TK_EOF) if (tok->next->kind != TK_EOF)
@ -706,7 +704,7 @@ static char *detect_include_guard(Token *tok) {
return NULL; return NULL;
} }
static Token *include_file(Token *tok, char *path) { static Token *include_file(Token *tok, char *path, Token *filename_tok) {
// Check for "#pragma once" // Check for "#pragma once"
if (hashmap_get(&pragma_once, path)) return tok; if (hashmap_get(&pragma_once, path)) return tok;
// If we read the same file before, and if the file was guarded // If we read the same file before, and if the file was guarded
@ -716,7 +714,8 @@ static Token *include_file(Token *tok, char *path) {
char *guard_name = hashmap_get(&include_guards, path); char *guard_name = hashmap_get(&include_guards, path);
if (guard_name && hashmap_get(&macros, guard_name)) return tok; if (guard_name && hashmap_get(&macros, guard_name)) return tok;
Token *tok2 = tokenize_file(path); Token *tok2 = tokenize_file(path);
if (!tok2) error_tok(tok, "%s: cannot open file: %s", path, strerror(errno)); if (!tok2)
error_tok(filename_tok, "%s: cannot open file: %s", path, strerror(errno));
guard_name = detect_include_guard(tok2); guard_name = detect_include_guard(tok2);
if (guard_name) hashmap_put(&include_guards, path, guard_name); if (guard_name) hashmap_put(&include_guards, path, guard_name);
return append(tok2, tok); return append(tok2, tok);
@ -760,19 +759,19 @@ static Token *preprocess2(Token *tok) {
char *path = char *path =
xasprintf("%s/%s", dirname(strdup(start->file->name)), filename); xasprintf("%s/%s", dirname(strdup(start->file->name)), filename);
if (fileexists(path)) { if (fileexists(path)) {
tok = include_file(tok, path); tok = include_file(tok, path, start->next->next);
continue; continue;
} }
} }
char *path = search_include_paths(filename); char *path = search_include_paths(filename);
tok = include_file(tok, path ? path : filename); tok = include_file(tok, path ? path : filename, start->next->next);
continue; continue;
} }
if (EQUAL(tok, "include_next")) { if (EQUAL(tok, "include_next")) {
bool ignore; bool ignore;
char *filename = read_include_filename(&tok, tok->next, &ignore); char *filename = read_include_filename(&tok, tok->next, &ignore);
char *path = search_include_next(filename); char *path = search_include_next(filename);
tok = include_file(tok, path ? path : filename); tok = include_file(tok, path ? path : filename, start->next->next);
continue; continue;
} }
if (EQUAL(tok, "define")) { if (EQUAL(tok, "define")) {
@ -914,17 +913,13 @@ static char *format_date(struct tm *tm) {
"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
}; };
char buf[30]; return xasprintf("\"%s %2d %d\"", mon[tm->tm_mon], tm->tm_mday,
sprintf(buf, "\"%s %2d %d\"", mon[tm->tm_mon], tm->tm_mday, tm->tm_year + 1900);
tm->tm_year + 1900);
return strdup(buf);
} }
// __TIME__ is expanded to the current time, e.g. "13:34:03". // __TIME__ is expanded to the current time, e.g. "13:34:03".
static char *format_time(struct tm *tm) { static char *format_time(struct tm *tm) {
char buf[30]; return xasprintf("\"%02d:%02d:%02d\"", tm->tm_hour, tm->tm_min, tm->tm_sec);
sprintf(buf, "\"%02d:%02d:%02d\"", tm->tm_hour, tm->tm_min, tm->tm_sec);
return strdup(buf);
} }
void init_macros(void) { void init_macros(void) {
@ -1302,11 +1297,11 @@ static void join_adjacent_string_literals(Token *tok) {
} }
// Second pass: concatenate adjacent string literals. // Second pass: concatenate adjacent string literals.
for (Token *tok1 = tok; tok1->kind != TK_EOF;) { for (Token *tok1 = tok; tok1->kind != TK_EOF;) {
Token *tok2 = tok1->next; if (tok1->kind != TK_STR || tok1->next->kind != TK_STR) {
if (tok1->kind != TK_STR || tok2->kind != TK_STR) {
tok1 = tok1->next; tok1 = tok1->next;
continue; continue;
} }
#if 0
assert(tok1->ty->base->size == tok2->ty->base->size); assert(tok1->ty->base->size == tok2->ty->base->size);
Token *t = copy_token(tok1); Token *t = copy_token(tok1);
t->ty = t->ty =
@ -1317,6 +1312,25 @@ static void join_adjacent_string_literals(Token *tok) {
tok2->str, tok2->ty->size); tok2->str, tok2->ty->size);
t->len = strlen(t->loc); t->len = strlen(t->loc);
*tok1 = *t; *tok1 = *t;
#else
Token *tok2 = tok1->next;
while (tok2->kind == TK_STR) tok2 = tok2->next;
int len = tok1->ty->array_len;
for (Token *t = tok1->next; t != tok2; t = t->next) {
len = len + t->ty->array_len - 1;
}
char *buf = calloc(tok1->ty->base->size, len);
int i = 0;
for (Token *t = tok1; t != tok2; t = t->next) {
memcpy(buf + i, t->str, t->ty->size);
i = i + t->ty->size - t->ty->base->size;
}
*tok1 = *copy_token(tok1);
tok1->ty = array_of(tok1->ty->base, len);
tok1->str = buf;
tok1->next = tok2;
tok1 = tok2;
#endif
} }
} }

View File

@ -1,4 +0,0 @@
main(void) {
void *p;
p = "hello";
}

View File

@ -445,6 +445,15 @@ int main() {
}; };
x.a; x.a;
})); }));
ASSERT(1, ({
union {
int a;
char b;
} x = {
1,
};
x.a;
}));
ASSERT(2, ({ ASSERT(2, ({
enum { enum {
x, x,

View File

@ -392,6 +392,24 @@ int main() {
} x = {1}, y = {2}; } x = {1}, y = {2};
(0 ? x : y).a; (0 ? x : y).a;
})); }));
ASSERT(2, ({
struct {
int a;
} x = {1}, y = {2};
(x = y).a;
}));
ASSERT(1, ({
struct {
int a;
} x = {1}, y = {2};
(1 ? x : y).a;
}));
ASSERT(2, ({
struct {
int a;
} x = {1}, y = {2};
(0 ? x : y).a;
}));
return 0; return 0;
} }

View File

@ -38,7 +38,7 @@ static void verror_at(char *filename, char *input, int line_no, char *loc,
int indent = fprintf(stderr, "%s:%d: ", filename, line_no); int indent = fprintf(stderr, "%s:%d: ", filename, line_no);
fprintf(stderr, "%.*s\n", (int)(end - line), line); fprintf(stderr, "%.*s\n", (int)(end - line), line);
// Show the error message. // Show the error message.
int pos = str_width(line, loc - line) + indent; int pos = display_width(line, loc - line) + indent;
fprintf(stderr, "%*s", pos, ""); // print pos spaces. fprintf(stderr, "%*s", pos, ""); // print pos spaces.
fprintf(stderr, "^ "); fprintf(stderr, "^ ");
vfprintf(stderr, fmt, ap); vfprintf(stderr, fmt, ap);
@ -53,6 +53,7 @@ void error_at(char *loc, char *fmt, ...) {
va_list ap; va_list ap;
va_start(ap, fmt); va_start(ap, fmt);
verror_at(current_file->name, current_file->contents, line_no, loc, fmt, ap); verror_at(current_file->name, current_file->contents, line_no, loc, fmt, ap);
va_end(ap);
exit(1); exit(1);
} }
@ -64,7 +65,7 @@ void error_tok(Token *tok, char *fmt, ...) {
verror_at(t->file->name, t->file->contents, t->line_no, t->loc, fmt, ap); verror_at(t->file->name, t->file->contents, t->line_no, t->loc, fmt, ap);
va_end(ap); va_end(ap);
} }
va_end(va); va_end(ap);
exit(1); exit(1);
} }
@ -73,6 +74,7 @@ void warn_tok(Token *tok, char *fmt, ...) {
va_start(ap, fmt); va_start(ap, fmt);
verror_at(tok->file->name, tok->file->contents, tok->line_no, tok->loc, fmt, verror_at(tok->file->name, tok->file->contents, tok->line_no, tok->loc, fmt,
ap); ap);
va_end(ap);
} }
static int is_space(int c) { static int is_space(int c) {
@ -103,9 +105,9 @@ Token *skip(Token *tok, char op) {
} }
} }
// Create a new token and add it as the next token of `cur`. // Create a new token.
static Token *new_token(TokenKind kind, char *start, char *end) { static Token *new_token(TokenKind kind, char *start, char *end) {
Token *tok = calloc(1, sizeof(Token)); Token *tok = alloc_token();
tok->kind = kind; tok->kind = kind;
tok->loc = start; tok->loc = start;
tok->len = end - start; tok->len = end - start;
@ -117,18 +119,17 @@ static Token *new_token(TokenKind kind, char *start, char *end) {
return tok; return tok;
} }
// Read an identifier and returns a pointer pointing to the end // Read an identifier and returns the length of it.
// of an identifier. // If p does not point to a valid identifier, 0 is returned.
// static int read_ident(char *start) {
// Returns null if p does not point to a valid identifier. char *p = start;
static char *read_ident(char *p) {
uint32_t c = decode_utf8(&p, p); uint32_t c = decode_utf8(&p, p);
if (!is_ident1(c)) return NULL; if (!is_ident1(c)) return 0;
for (;;) { for (;;) {
char *q; char *q;
c = decode_utf8(&q, p); c = decode_utf8(&q, p);
if (!('a' <= c && c <= 'f') && !is_ident2(c)) { if (!('a' <= c && c <= 'f') && !is_ident2(c)) {
return p; return p - start;
} }
p = q; p = q;
} }
@ -140,6 +141,19 @@ static int from_hex(char c) {
return c - 'A' + 10; return c - 'A' + 10;
} }
// Read a punctuator token from p and returns its length.
static int read_punct(char *p) {
static char *kw[] = {"<<=", ">>=", "...", "==", "!=", "<=", ">=", "->",
"+=", "-=", "*=", "/=", "++", "--", "%=", "&=",
"|=", "^=", "&&", "||", "<<", ">>", "##"};
for (int i = 0; i < sizeof(kw) / sizeof(*kw); i++) {
if (startswith(p, kw[i])) {
return strlen(kw[i]);
}
}
return ispunct(*p) ? 1 : 0;
}
static bool is_keyword(Token *tok) { static bool is_keyword(Token *tok) {
static HashMap map; static HashMap map;
if (map.capacity == 0) { if (map.capacity == 0) {
@ -190,6 +204,17 @@ static int read_escaped_char(char **new_pos, char *p) {
return c; return c;
} }
*new_pos = p + 1; *new_pos = p + 1;
// Escape sequences are defined using themselves here. E.g.
// '\n' is implemented using '\n'. This tautological definition
// works because the compiler that compiles our compiler knows
// what '\n' actually is. In other words, we "inherit" the ASCII
// code of '\n' from the compiler that compiles our compiler,
// so we don't have to teach the actual code here.
//
// This fact has huge implications not only for the correctness
// of the compiler but also for the security of the generated code.
// For more info, read "Reflections on Trusting Trust" by Ken Thompson.
// https://github.com/rui314/chibicc/wiki/thompson1984.pdf
switch (*p) { switch (*p) {
case 'a': case 'a':
return '\a'; return '\a';
@ -217,7 +242,7 @@ static int read_escaped_char(char **new_pos, char *p) {
static char *string_literal_end(char *p) { static char *string_literal_end(char *p) {
char *start = p; char *start = p;
for (; *p != '"'; p++) { for (; *p != '"'; p++) {
if (*p == '\0') error_at(start, "unclosed string literal"); if (*p == '\n' || *p == '\0') error_at(start, "unclosed string literal");
if (*p == '\\') p++; if (*p == '\\') p++;
} }
return p; return p;
@ -225,7 +250,7 @@ static char *string_literal_end(char *p) {
static Token *read_string_literal(char *start, char *quote) { static Token *read_string_literal(char *start, char *quote) {
char *end = string_literal_end(quote + 1); char *end = string_literal_end(quote + 1);
char *buf = calloc(1, end - quote); char *buf = calloc(2, end - quote);
int len = 0; int len = 0;
for (char *p = quote + 1; p < end;) { for (char *p = quote + 1; p < end;) {
if (*p == '\\') if (*p == '\\')
@ -409,7 +434,7 @@ static void convert_pp_number(Token *tok) {
void convert_pp_tokens(Token *tok) { void convert_pp_tokens(Token *tok) {
for (Token *t = tok; t->kind != TK_EOF; t = t->next) { for (Token *t = tok; t->kind != TK_EOF; t = t->next) {
if (is_keyword(t)) if (is_keyword(t))
t->kind = TK_RESERVED; t->kind = TK_KEYWORD;
else if (t->kind == TK_PP_NUM) else if (t->kind == TK_PP_NUM)
convert_pp_number(t); convert_pp_number(t);
} }
@ -546,34 +571,17 @@ Token *tokenize(File *file) {
continue; continue;
} }
// Identifier or keyword // Identifier or keyword
char *q; int ident_len = read_ident(p);
if ((q = read_ident(p)) != NULL) { if (ident_len) {
cur = cur->next = new_token(TK_IDENT, p, q); cur = cur->next = new_token(TK_IDENT, p, p + ident_len);
p = q; p += cur->len;
continue; continue;
} }
// Three-letter punctuators // Punctuators
if (LOOKINGAT(p, "<<=") || LOOKINGAT(p, ">>=") || LOOKINGAT(p, "...")) { int punct_len = read_punct(p);
cur = cur->next = new_token(TK_RESERVED, p, p + 3); if (punct_len) {
p += 3; cur = cur->next = new_token(TK_PUNCT, p, p + punct_len);
continue; p += cur->len;
}
// Two-letter punctuators
if (LOOKINGAT(p, "==") || LOOKINGAT(p, "!=") || LOOKINGAT(p, "<=") ||
LOOKINGAT(p, ">=") || LOOKINGAT(p, "->") || LOOKINGAT(p, "+=") ||
LOOKINGAT(p, "-=") || LOOKINGAT(p, "*=") || LOOKINGAT(p, "/=") ||
LOOKINGAT(p, "++") || LOOKINGAT(p, "--") || LOOKINGAT(p, "%=") ||
LOOKINGAT(p, "&=") || LOOKINGAT(p, "|=") || LOOKINGAT(p, "^=") ||
LOOKINGAT(p, "&&") || LOOKINGAT(p, "||") || LOOKINGAT(p, "<<") ||
LOOKINGAT(p, ">>") || LOOKINGAT(p, "##")) {
cur = cur->next = new_token(TK_RESERVED, p, p + 2);
p += 2;
continue;
}
// Single-letter punctuators
if (ispunct(*p)) {
cur = cur->next = new_token(TK_RESERVED, p, p + 1);
p++;
continue; continue;
} }
error_at(p, "invalid token"); error_at(p, "invalid token");
@ -665,6 +673,7 @@ static void remove_backslash_newline(char *p) {
p[j++] = p[i++]; p[j++] = p[i++];
} }
} }
for (; n > 0; n--) p[j++] = '\n';
p[j] = '\0'; p[j] = '\0';
} }
@ -710,6 +719,11 @@ static void convert_universal_chars(char *p) {
Token *tokenize_file(char *path) { Token *tokenize_file(char *path) {
char *p = read_file(path); char *p = read_file(path);
if (!p) return NULL; if (!p) return NULL;
// UTF-8 texts may start with a 3-byte "BOM" marker sequence.
// If exists, just skip them because they are useless bytes.
// (It is actually not recommended to add BOM markers to UTF-8
// texts, but it's not uncommon particularly on Windows.)
if (!memcmp(p, "\xef\xbb\xbf", 3)) p += 3;
canonicalize_newline(p); canonicalize_newline(p);
remove_backslash_newline(p); remove_backslash_newline(p);
convert_universal_chars(p); convert_universal_chars(p);

View File

@ -18,7 +18,7 @@ Type ty_double[1] = {{TY_DOUBLE, 8, 8}};
Type ty_ldouble[1] = {{TY_LDOUBLE, 16, 16}}; Type ty_ldouble[1] = {{TY_LDOUBLE, 16, 16}};
static Type *new_type(TypeKind kind, int size, int align) { static Type *new_type(TypeKind kind, int size, int align) {
Type *ty = calloc(1, sizeof(Type)); Type *ty = alloc_type();
ty->kind = kind; ty->kind = kind;
ty->size = size; ty->size = size;
ty->align = align; ty->align = align;
@ -77,7 +77,7 @@ bool is_compatible(Type *t1, Type *t2) {
} }
Type *copy_type(Type *ty) { Type *copy_type(Type *ty) {
Type *ret = calloc(1, sizeof(Type)); Type *ret = alloc_type();
*ret = *ty; *ret = *ty;
ret->origin = ty; ret->origin = ty;
return ret; return ret;

View File

@ -66,9 +66,9 @@ static bool in_range(uint32_t *range, uint32_t c) {
return false; return false;
} }
// C11 allows not only ASCII but some multibyte characters in certan // [https://www.sigbus.info/n1570#D] C11 allows not only ASCII but
// Unicode ranges to be used in an identifier. See C11 Annex D for the // some multibyte characters in certan Unicode ranges to be used in an
// details. // identifier.
// //
// This function returns true if a given character is acceptable as // This function returns true if a given character is acceptable as
// the first character of an identifier. // the first character of an identifier.
@ -108,7 +108,7 @@ bool is_ident2(uint32_t c) {
// Returns the number of columns needed to display a given // Returns the number of columns needed to display a given
// string in a fixed-width font. // string in a fixed-width font.
int str_width(char *p, int len) { int display_width(char *p, int len) {
char *start = p; char *start = p;
int w = 0; int w = 0;
while (p - start < len) { while (p - start < len) {