Integrate more chibicc changes
parent
2ed7956be4
commit
15280753e2
|
@ -0,0 +1,44 @@
|
||||||
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||||
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||||
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||||
|
│ │
|
||||||
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||||
|
│ any purpose with or without fee is hereby granted, provided that the │
|
||||||
|
│ above copyright notice and this permission notice appear in all copies. │
|
||||||
|
│ │
|
||||||
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||||
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||||
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||||
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||||
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||||
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||||
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "third_party/chibicc/chibicc.h"
|
||||||
|
|
||||||
|
long alloc_node_count;
|
||||||
|
long alloc_token_count;
|
||||||
|
long alloc_obj_count;
|
||||||
|
long alloc_type_count;
|
||||||
|
|
||||||
|
Node *alloc_node(void) {
|
||||||
|
++alloc_node_count;
|
||||||
|
return calloc(1, sizeof(Node));
|
||||||
|
}
|
||||||
|
|
||||||
|
Token *alloc_token(void) {
|
||||||
|
++alloc_token_count;
|
||||||
|
return calloc(1, sizeof(Token));
|
||||||
|
}
|
||||||
|
|
||||||
|
Obj *alloc_obj(void) {
|
||||||
|
++alloc_obj_count;
|
||||||
|
return calloc(1, sizeof(Obj));
|
||||||
|
}
|
||||||
|
|
||||||
|
Type *alloc_type(void) {
|
||||||
|
++alloc_type_count;
|
||||||
|
return calloc(1, sizeof(Type));
|
||||||
|
}
|
|
@ -141,7 +141,16 @@ static char *quote_makefile(char *s) {
|
||||||
static void PrintMemoryUsage(void) {
|
static void PrintMemoryUsage(void) {
|
||||||
struct mallinfo mi;
|
struct mallinfo mi;
|
||||||
mi = mallinfo();
|
mi = mallinfo();
|
||||||
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "allocated %,ld bytes of memory\n", mi.arena);
|
fprintf(stderr, "allocated %,ld bytes of memory\n", mi.arena);
|
||||||
|
fprintf(stderr, "allocated %,ld nodes (%,ld bytes)\n", alloc_node_count,
|
||||||
|
sizeof(Node) * alloc_node_count);
|
||||||
|
fprintf(stderr, "allocated %,ld tokens (%,ld bytes)\n", alloc_token_count,
|
||||||
|
sizeof(Token) * alloc_token_count);
|
||||||
|
fprintf(stderr, "allocated %,ld objs (%,ld bytes)\n", alloc_obj_count,
|
||||||
|
sizeof(Obj) * alloc_obj_count);
|
||||||
|
fprintf(stderr, "allocated %,ld types (%,ld bytes)\n", alloc_type_count,
|
||||||
|
sizeof(Type) * alloc_type_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void strarray_push_comma(StringArray *a, char *s) {
|
static void strarray_push_comma(StringArray *a, char *s) {
|
||||||
|
|
|
@ -62,14 +62,14 @@ void strarray_push(StringArray *, char *);
|
||||||
// tokenize.c
|
// tokenize.c
|
||||||
//
|
//
|
||||||
|
|
||||||
// Token
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
TK_RESERVED, // Keywords or punctuators
|
TK_IDENT, // Identifiers
|
||||||
TK_IDENT, // Identifiers
|
TK_PUNCT, // Punctuators
|
||||||
TK_STR, // String literals
|
TK_KEYWORD, // Keywords
|
||||||
TK_NUM, // Numeric literals
|
TK_STR, // String literals
|
||||||
TK_PP_NUM, // Preprocessing numbers
|
TK_NUM, // Numeric literals
|
||||||
TK_EOF, // End-of-file markers
|
TK_PP_NUM, // Preprocessing numbers
|
||||||
|
TK_EOF, // End-of-file markers
|
||||||
} TokenKind;
|
} TokenKind;
|
||||||
|
|
||||||
struct File {
|
struct File {
|
||||||
|
@ -81,15 +81,14 @@ struct File {
|
||||||
int line_delta;
|
int line_delta;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Token type
|
struct thatispacked Token {
|
||||||
struct Token {
|
Token *next; // Next token
|
||||||
TokenKind kind; // Token kind
|
|
||||||
int len; // Token length
|
int len; // Token length
|
||||||
int line_no; // Line number
|
int line_no; // Line number
|
||||||
int line_delta; // Line number
|
int line_delta; // Line number
|
||||||
|
TokenKind kind; // Token kind
|
||||||
bool at_bol; // True if this token is at beginning of line
|
bool at_bol; // True if this token is at beginning of line
|
||||||
bool has_space; // True if this token follows a space character
|
bool has_space; // True if this token follows a space character
|
||||||
Token *next; // Next token
|
|
||||||
char *loc; // Token location
|
char *loc; // Token location
|
||||||
Type *ty; // Used if TK_NUM or TK_STR
|
Type *ty; // Used if TK_NUM or TK_STR
|
||||||
File *file; // Source location
|
File *file; // Source location
|
||||||
|
@ -518,7 +517,7 @@ int encode_utf8(char *, uint32_t);
|
||||||
uint32_t decode_utf8(char **, char *);
|
uint32_t decode_utf8(char **, char *);
|
||||||
bool is_ident1(uint32_t);
|
bool is_ident1(uint32_t);
|
||||||
bool is_ident2(uint32_t);
|
bool is_ident2(uint32_t);
|
||||||
int str_width(char *, int);
|
int display_width(char *, int);
|
||||||
|
|
||||||
//
|
//
|
||||||
// hashmap.c
|
// hashmap.c
|
||||||
|
@ -564,6 +563,20 @@ extern bool opt_sse4;
|
||||||
extern bool opt_verbose;
|
extern bool opt_verbose;
|
||||||
extern char *base_file;
|
extern char *base_file;
|
||||||
|
|
||||||
|
//
|
||||||
|
// alloc.c
|
||||||
|
//
|
||||||
|
|
||||||
|
extern long alloc_node_count;
|
||||||
|
extern long alloc_token_count;
|
||||||
|
extern long alloc_obj_count;
|
||||||
|
extern long alloc_type_count;
|
||||||
|
|
||||||
|
Node *alloc_node(void);
|
||||||
|
Token *alloc_token(void);
|
||||||
|
Obj *alloc_obj(void);
|
||||||
|
Type *alloc_type(void);
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
COSMOPOLITAN_C_END_
|
||||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||||
#endif /* COSMOPOLITAN_THIRD_PARTY_CHIBICC_CHIBICC_H_ */
|
#endif /* COSMOPOLITAN_THIRD_PARTY_CHIBICC_CHIBICC_H_ */
|
||||||
|
|
|
@ -2219,7 +2219,7 @@ static void emit_data(Obj *prog) {
|
||||||
int align = (var->ty->kind == TY_ARRAY && var->ty->size >= 16)
|
int align = (var->ty->kind == TY_ARRAY && var->ty->size >= 16)
|
||||||
? MAX(16, var->align)
|
? MAX(16, var->align)
|
||||||
: var->align;
|
: var->align;
|
||||||
if (opt_common && var->is_tentative && !var->is_tls) {
|
if (opt_common && var->is_tentative) {
|
||||||
println("\t.comm\t%s,%d,%d", nameof(var), var->ty->size, align);
|
println("\t.comm\t%s,%d,%d", nameof(var), var->ty->size, align);
|
||||||
} else {
|
} else {
|
||||||
if (var->section) {
|
if (var->section) {
|
||||||
|
@ -2410,10 +2410,10 @@ static void emit_text(Obj *prog) {
|
||||||
// Emit code
|
// Emit code
|
||||||
gen_stmt(fn->body);
|
gen_stmt(fn->body);
|
||||||
assert(!depth);
|
assert(!depth);
|
||||||
// The C spec defines a special rule for the main function.
|
// [https://www.sigbus.info/n1570#5.1.2.2.3p1] The C spec defines
|
||||||
// Reaching the end of the main function is equivalent to
|
// a special rule for the main function. Reaching the end of the
|
||||||
// returning 0, even though the behavior is undefined for the
|
// main function is equivalent to returning 0, even though the
|
||||||
// other functions. See C11 5.1.2.2.3.
|
// behavior is undefined for the other functions.
|
||||||
if (strcmp(nameof(fn), "main") == 0) {
|
if (strcmp(nameof(fn), "main") == 0) {
|
||||||
emitlin("\txor\t%eax,%eax");
|
emitlin("\txor\t%eax,%eax");
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,10 @@
|
||||||
|
|
||||||
#include "third_party/chibicc/chibicc.h"
|
#include "third_party/chibicc/chibicc.h"
|
||||||
|
|
||||||
#define TOMBSTONE ((void *)-1) // Represents a deleted hash entry
|
#define INIT_SIZE 16 // initial hash bucket size
|
||||||
|
#define LOW_WATERMARK 50 // keep usage below 50% after rehashing
|
||||||
|
#define HIGH_WATERMARK 70 // perform rehash when usage exceeds 70%
|
||||||
|
#define TOMBSTONE ((void *)-1) // represents deleted hash table entry
|
||||||
|
|
||||||
static uint64_t fnv_hash(char *s, int len) {
|
static uint64_t fnv_hash(char *s, int len) {
|
||||||
uint64_t hash = 0xcbf29ce484222325;
|
uint64_t hash = 0xcbf29ce484222325;
|
||||||
|
@ -24,7 +27,8 @@ static void rehash(HashMap *map) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
size_t cap = map->capacity;
|
size_t cap = map->capacity;
|
||||||
while ((nkeys * 100) / cap >= 50) cap = cap * 2;
|
while ((nkeys * 100) / cap >= LOW_WATERMARK) cap = cap * 2;
|
||||||
|
assert(cap > 0);
|
||||||
// Create a new hashmap and copy all key-values.
|
// Create a new hashmap and copy all key-values.
|
||||||
HashMap map2 = {};
|
HashMap map2 = {};
|
||||||
map2.buckets = calloc(cap, sizeof(HashEntry));
|
map2.buckets = calloc(cap, sizeof(HashEntry));
|
||||||
|
@ -56,9 +60,11 @@ static HashEntry *get_entry(HashMap *map, char *key, int keylen) {
|
||||||
|
|
||||||
static HashEntry *get_or_insert_entry(HashMap *map, char *key, int keylen) {
|
static HashEntry *get_or_insert_entry(HashMap *map, char *key, int keylen) {
|
||||||
if (!map->buckets) {
|
if (!map->buckets) {
|
||||||
map->buckets = calloc((map->capacity = 16), sizeof(HashEntry));
|
map->buckets = calloc(INIT_SIZE, sizeof(HashEntry));
|
||||||
|
map->capacity = INIT_SIZE;
|
||||||
|
} else if ((map->used * 100) / map->capacity >= HIGH_WATERMARK) {
|
||||||
|
rehash(map);
|
||||||
}
|
}
|
||||||
if ((map->used * 100) / map->capacity >= 70) rehash(map);
|
|
||||||
uint64_t hash = fnv_hash(key, keylen);
|
uint64_t hash = fnv_hash(key, keylen);
|
||||||
for (int i = 0; i < map->capacity; i++) {
|
for (int i = 0; i < map->capacity; i++) {
|
||||||
HashEntry *ent = &map->buckets[(hash + i) & (map->capacity - 1)];
|
HashEntry *ent = &map->buckets[(hash + i) & (map->capacity - 1)];
|
||||||
|
|
|
@ -25,21 +25,13 @@ typedef struct Scope Scope;
|
||||||
// Scope for local variables, global variables, typedefs
|
// Scope for local variables, global variables, typedefs
|
||||||
// or enum constants
|
// or enum constants
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char *name;
|
|
||||||
int depth;
|
|
||||||
Obj *var;
|
Obj *var;
|
||||||
Type *type_def;
|
Type *type_def;
|
||||||
Type *enum_ty;
|
Type *enum_ty;
|
||||||
int enum_val;
|
int enum_val;
|
||||||
} VarScope;
|
} VarScope;
|
||||||
|
|
||||||
// Scope for struct, union or enum tags
|
// Represents a block scope.
|
||||||
typedef struct {
|
|
||||||
char *name;
|
|
||||||
int depth;
|
|
||||||
Type *ty;
|
|
||||||
} TagScope;
|
|
||||||
|
|
||||||
struct Scope {
|
struct Scope {
|
||||||
Scope *next;
|
Scope *next;
|
||||||
// C has two block scopes; one is for variables/typedefs and
|
// C has two block scopes; one is for variables/typedefs and
|
||||||
|
@ -103,10 +95,6 @@ static Obj *globals;
|
||||||
|
|
||||||
static Scope *scope = &(Scope){};
|
static Scope *scope = &(Scope){};
|
||||||
|
|
||||||
// scope_depth is incremented by one at the beginning of a block
|
|
||||||
// scope and decremented by one at the end of a block scope.
|
|
||||||
static int scope_depth;
|
|
||||||
|
|
||||||
// Points to the function object the parser is currently parsing.
|
// Points to the function object the parser is currently parsing.
|
||||||
static Obj *current_fn;
|
static Obj *current_fn;
|
||||||
|
|
||||||
|
@ -173,12 +161,10 @@ static void enter_scope(void) {
|
||||||
Scope *sc = calloc(1, sizeof(Scope));
|
Scope *sc = calloc(1, sizeof(Scope));
|
||||||
sc->next = scope;
|
sc->next = scope;
|
||||||
scope = sc;
|
scope = sc;
|
||||||
scope_depth++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void leave_scope(void) {
|
static void leave_scope(void) {
|
||||||
scope = scope->next;
|
scope = scope->next;
|
||||||
scope_depth--;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find a variable by name.
|
// Find a variable by name.
|
||||||
|
@ -190,16 +176,16 @@ static VarScope *find_var(Token *tok) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static TagScope *find_tag(Token *tok) {
|
static Type *find_tag(Token *tok) {
|
||||||
for (Scope *sc = scope; sc; sc = sc->next) {
|
for (Scope *sc = scope; sc; sc = sc->next) {
|
||||||
TagScope *sc2 = hashmap_get2(&sc->tags, tok->loc, tok->len);
|
Type *ty = hashmap_get2(&sc->tags, tok->loc, tok->len);
|
||||||
if (sc2) return sc2;
|
if (ty) return ty;
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
Node *new_node(NodeKind kind, Token *tok) {
|
Node *new_node(NodeKind kind, Token *tok) {
|
||||||
Node *node = calloc(1, sizeof(Node));
|
Node *node = alloc_node();
|
||||||
node->kind = kind;
|
node->kind = kind;
|
||||||
node->tok = tok;
|
node->tok = tok;
|
||||||
return node;
|
return node;
|
||||||
|
@ -252,7 +238,7 @@ static Node *new_vla_ptr(Obj *var, Token *tok) {
|
||||||
|
|
||||||
Node *new_cast(Node *expr, Type *ty) {
|
Node *new_cast(Node *expr, Type *ty) {
|
||||||
add_type(expr);
|
add_type(expr);
|
||||||
Node *node = calloc(1, sizeof(Node));
|
Node *node = alloc_node();
|
||||||
node->kind = ND_CAST;
|
node->kind = ND_CAST;
|
||||||
node->tok = expr->tok;
|
node->tok = expr->tok;
|
||||||
node->lhs = expr;
|
node->lhs = expr;
|
||||||
|
@ -262,8 +248,6 @@ Node *new_cast(Node *expr, Type *ty) {
|
||||||
|
|
||||||
static VarScope *push_scope(char *name) {
|
static VarScope *push_scope(char *name) {
|
||||||
VarScope *sc = calloc(1, sizeof(VarScope));
|
VarScope *sc = calloc(1, sizeof(VarScope));
|
||||||
sc->name = name;
|
|
||||||
sc->depth = scope_depth;
|
|
||||||
hashmap_put(&scope->vars, name, sc);
|
hashmap_put(&scope->vars, name, sc);
|
||||||
return sc;
|
return sc;
|
||||||
}
|
}
|
||||||
|
@ -303,7 +287,7 @@ static Initializer *new_initializer(Type *ty, bool is_flexible) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static Obj *new_var(char *name, Type *ty) {
|
static Obj *new_var(char *name, Type *ty) {
|
||||||
Obj *var = calloc(1, sizeof(Obj));
|
Obj *var = alloc_obj();
|
||||||
var->name = name;
|
var->name = name;
|
||||||
var->ty = ty;
|
var->ty = ty;
|
||||||
var->align = ty->align;
|
var->align = ty->align;
|
||||||
|
@ -330,9 +314,7 @@ static Obj *new_gvar(char *name, Type *ty) {
|
||||||
|
|
||||||
static char *new_unique_name(void) {
|
static char *new_unique_name(void) {
|
||||||
static int id = 0;
|
static int id = 0;
|
||||||
char *buf = calloc(1, 20);
|
return xasprintf(".L..%d", id++);
|
||||||
sprintf(buf, ".L..%d", id++);
|
|
||||||
return buf;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static Obj *new_anon_gvar(Type *ty) {
|
static Obj *new_anon_gvar(Type *ty) {
|
||||||
|
@ -360,11 +342,7 @@ static Type *find_typedef(Token *tok) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void push_tag_scope(Token *tok, Type *ty) {
|
static void push_tag_scope(Token *tok, Type *ty) {
|
||||||
TagScope *sc = calloc(1, sizeof(TagScope));
|
hashmap_put2(&scope->tags, tok->loc, tok->len, ty);
|
||||||
sc->name = strndup(tok->loc, tok->len);
|
|
||||||
sc->depth = scope_depth;
|
|
||||||
sc->ty = ty;
|
|
||||||
hashmap_put2(&scope->tags, tok->loc, tok->len, sc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Consumes token if equal to STR or __STR__.
|
// Consumes token if equal to STR or __STR__.
|
||||||
|
@ -599,9 +577,14 @@ static Token *thing_attributes(Token *tok, void *arg) {
|
||||||
error_tok(tok, "unknown function attribute");
|
error_tok(tok, "unknown function attribute");
|
||||||
}
|
}
|
||||||
|
|
||||||
// typespec = typename typename*
|
// declspec = ("void" | "_Bool" | "char" | "short" | "int" | "long"
|
||||||
// typename = "void" | "_Bool" | "char" | "short" | "int" | "long"
|
// | "typedef" | "static" | "extern" | "inline"
|
||||||
// | struct-decl | union-decl | typedef-name
|
// | "_Thread_local" | "__thread"
|
||||||
|
// | "signed" | "unsigned"
|
||||||
|
// | struct-decl | union-decl | typedef-name
|
||||||
|
// | enum-specifier | typeof-specifier
|
||||||
|
// | "const" | "volatile" | "auto" | "register" | "restrict"
|
||||||
|
// | "__restrict" | "__restrict__" | "_Noreturn")+
|
||||||
//
|
//
|
||||||
// The order of typenames in a type-specifier doesn't matter. For
|
// The order of typenames in a type-specifier doesn't matter. For
|
||||||
// example, `int long static` means the same as `static long int`.
|
// example, `int long static` means the same as `static long int`.
|
||||||
|
@ -614,7 +597,7 @@ static Token *thing_attributes(Token *tok, void *arg) {
|
||||||
// while keeping the "current" type object that the typenames up
|
// while keeping the "current" type object that the typenames up
|
||||||
// until that point represent. When we reach a non-typename token,
|
// until that point represent. When we reach a non-typename token,
|
||||||
// we returns the current type object.
|
// we returns the current type object.
|
||||||
static Type *typespec(Token **rest, Token *tok, VarAttr *attr) {
|
static Type *declspec(Token **rest, Token *tok, VarAttr *attr) {
|
||||||
// We use a single integer as counters for all typenames.
|
// We use a single integer as counters for all typenames.
|
||||||
// For example, bits 0 and 1 represents how many times we saw the
|
// For example, bits 0 and 1 represents how many times we saw the
|
||||||
// keyword "void" so far. With this, we can use a switch statement
|
// keyword "void" so far. With this, we can use a switch statement
|
||||||
|
@ -851,7 +834,7 @@ static Token *static_assertion(Token *tok) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// func-params = ("void" | param ("," param)* ("," "...")?)? ")"
|
// func-params = ("void" | param ("," param)* ("," "...")?)? ")"
|
||||||
// param = typespec declarator
|
// param = declspec declarator
|
||||||
static Type *func_params(Token **rest, Token *tok, Type *ty) {
|
static Type *func_params(Token **rest, Token *tok, Type *ty) {
|
||||||
if (EQUAL(tok, "void") && EQUAL(tok->next, ")")) {
|
if (EQUAL(tok, "void") && EQUAL(tok->next, ")")) {
|
||||||
*rest = tok->next->next;
|
*rest = tok->next->next;
|
||||||
|
@ -868,7 +851,7 @@ static Type *func_params(Token **rest, Token *tok, Type *ty) {
|
||||||
skip(tok, ')');
|
skip(tok, ')');
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
Type *ty2 = typespec(&tok, tok, NULL);
|
Type *ty2 = declspec(&tok, tok, NULL);
|
||||||
ty2 = declarator(&tok, tok, ty2);
|
ty2 = declarator(&tok, tok, ty2);
|
||||||
Token *name = ty2->name;
|
Token *name = ty2->name;
|
||||||
if (ty2->kind == TY_ARRAY) {
|
if (ty2->kind == TY_ARRAY) {
|
||||||
|
@ -935,8 +918,8 @@ static Type *declarator(Token **rest, Token *tok, Type *ty) {
|
||||||
ty = pointers(&tok, tok, ty);
|
ty = pointers(&tok, tok, ty);
|
||||||
if (EQUAL(tok, "(")) {
|
if (EQUAL(tok, "(")) {
|
||||||
Token *start = tok;
|
Token *start = tok;
|
||||||
Type ignore = {};
|
Type dummy = {};
|
||||||
declarator(&tok, tok->next, &ignore);
|
declarator(&tok, start->next, &dummy);
|
||||||
tok = skip(tok, ')');
|
tok = skip(tok, ')');
|
||||||
ty = type_suffix(rest, tok, ty);
|
ty = type_suffix(rest, tok, ty);
|
||||||
ty = declarator(&tok, start->next, ty);
|
ty = declarator(&tok, start->next, ty);
|
||||||
|
@ -959,8 +942,8 @@ static Type *abstract_declarator(Token **rest, Token *tok, Type *ty) {
|
||||||
ty = pointers(&tok, tok, ty);
|
ty = pointers(&tok, tok, ty);
|
||||||
if (EQUAL(tok, "(")) {
|
if (EQUAL(tok, "(")) {
|
||||||
Token *start = tok;
|
Token *start = tok;
|
||||||
Type ignore = {};
|
Type dummy = {};
|
||||||
abstract_declarator(&tok, tok->next, &ignore);
|
abstract_declarator(&tok, start->next, &dummy);
|
||||||
tok = skip(tok, ')');
|
tok = skip(tok, ')');
|
||||||
ty = type_suffix(rest, tok, ty);
|
ty = type_suffix(rest, tok, ty);
|
||||||
return abstract_declarator(&tok, start->next, ty);
|
return abstract_declarator(&tok, start->next, ty);
|
||||||
|
@ -968,9 +951,9 @@ static Type *abstract_declarator(Token **rest, Token *tok, Type *ty) {
|
||||||
return type_suffix(rest, tok, ty);
|
return type_suffix(rest, tok, ty);
|
||||||
}
|
}
|
||||||
|
|
||||||
// type-name = typespec abstract-declarator
|
// type-name = declspec abstract-declarator
|
||||||
static Type *typename(Token **rest, Token *tok) {
|
static Type *typename(Token **rest, Token *tok) {
|
||||||
Type *ty = typespec(&tok, tok, NULL);
|
Type *ty = declspec(&tok, tok, NULL);
|
||||||
return abstract_declarator(rest, tok, ty);
|
return abstract_declarator(rest, tok, ty);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1003,11 +986,11 @@ static Type *enum_specifier(Token **rest, Token *tok) {
|
||||||
tok = tok->next;
|
tok = tok->next;
|
||||||
}
|
}
|
||||||
if (tag && !EQUAL(tok, "{")) {
|
if (tag && !EQUAL(tok, "{")) {
|
||||||
TagScope *sc = find_tag(tag);
|
Type *ty = find_tag(tag);
|
||||||
if (!sc) error_tok(tag, "unknown enum type");
|
if (!ty) error_tok(tag, "unknown enum type");
|
||||||
if (sc->ty->kind != TY_ENUM) error_tok(tag, "not an enum tag");
|
if (ty->kind != TY_ENUM) error_tok(tag, "not an enum tag");
|
||||||
*rest = tok;
|
*rest = tok;
|
||||||
return sc->ty;
|
return ty;
|
||||||
}
|
}
|
||||||
tok = skip(tok, '{');
|
tok = skip(tok, '{');
|
||||||
// Read an enum-list.
|
// Read an enum-list.
|
||||||
|
@ -1070,8 +1053,8 @@ static Node *new_alloca(Node *sz) {
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
// declaration = typespec (declarator ("=" expr)? ("," declarator ("="
|
// declaration = declspec (declarator ("=" expr)?
|
||||||
// expr)?)*)? ";"
|
// ("," declarator ("=" expr)?)*)? ";"
|
||||||
static Node *declaration(Token **rest, Token *tok, Type *basety,
|
static Node *declaration(Token **rest, Token *tok, Type *basety,
|
||||||
VarAttr *attr) {
|
VarAttr *attr) {
|
||||||
Node head = {};
|
Node head = {};
|
||||||
|
@ -1363,9 +1346,11 @@ static void struct_initializer1(Token **rest, Token *tok, Initializer *init) {
|
||||||
// struct-initializer2 = initializer ("," initializer)*
|
// struct-initializer2 = initializer ("," initializer)*
|
||||||
static void struct_initializer2(Token **rest, Token *tok, Initializer *init,
|
static void struct_initializer2(Token **rest, Token *tok, Initializer *init,
|
||||||
Member *mem) {
|
Member *mem) {
|
||||||
|
bool first = true;
|
||||||
for (; mem && !is_end(tok); mem = mem->next) {
|
for (; mem && !is_end(tok); mem = mem->next) {
|
||||||
Token *start = tok;
|
Token *start = tok;
|
||||||
if (mem != init->ty->members) tok = skip(tok, ',');
|
if (!first) tok = skip(tok, ',');
|
||||||
|
first = false;
|
||||||
if (EQUAL(tok, "[") || EQUAL(tok, ".")) {
|
if (EQUAL(tok, "[") || EQUAL(tok, ".")) {
|
||||||
*rest = start;
|
*rest = start;
|
||||||
return;
|
return;
|
||||||
|
@ -1389,6 +1374,7 @@ static void union_initializer(Token **rest, Token *tok, Initializer *init) {
|
||||||
init->mem = init->ty->members;
|
init->mem = init->ty->members;
|
||||||
if (EQUAL(tok, "{")) {
|
if (EQUAL(tok, "{")) {
|
||||||
initializer2(&tok, tok->next, init->children[0]);
|
initializer2(&tok, tok->next, init->children[0]);
|
||||||
|
CONSUME(&tok, tok, ",");
|
||||||
*rest = skip(tok, '}');
|
*rest = skip(tok, '}');
|
||||||
} else {
|
} else {
|
||||||
initializer2(rest, tok, init->children[0]);
|
initializer2(rest, tok, init->children[0]);
|
||||||
|
@ -1769,7 +1755,7 @@ static Node *stmt(Token **rest, Token *tok) {
|
||||||
brk_label = node->brk_label = new_unique_name();
|
brk_label = node->brk_label = new_unique_name();
|
||||||
cont_label = node->cont_label = new_unique_name();
|
cont_label = node->cont_label = new_unique_name();
|
||||||
if (is_typename(tok)) {
|
if (is_typename(tok)) {
|
||||||
Type *basety = typespec(&tok, tok, NULL);
|
Type *basety = declspec(&tok, tok, NULL);
|
||||||
node->init = declaration(&tok, tok, basety, NULL);
|
node->init = declaration(&tok, tok, basety, NULL);
|
||||||
} else {
|
} else {
|
||||||
node->init = expr_stmt(&tok, tok);
|
node->init = expr_stmt(&tok, tok);
|
||||||
|
@ -1872,7 +1858,7 @@ static Node *compound_stmt(Token **rest, Token *tok) {
|
||||||
while (!EQUAL(tok, "}")) {
|
while (!EQUAL(tok, "}")) {
|
||||||
if (is_typename(tok) && !EQUAL(tok->next, ":")) {
|
if (is_typename(tok) && !EQUAL(tok->next, ":")) {
|
||||||
VarAttr attr = {};
|
VarAttr attr = {};
|
||||||
Type *basety = typespec(&tok, tok, &attr);
|
Type *basety = declspec(&tok, tok, &attr);
|
||||||
if (attr.is_typedef) {
|
if (attr.is_typedef) {
|
||||||
tok = parse_typedef(tok, basety);
|
tok = parse_typedef(tok, basety);
|
||||||
continue;
|
continue;
|
||||||
|
@ -2565,30 +2551,14 @@ static Node *mul(Token **rest, Token *tok) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// compound-literal = initializer "}"
|
// cast = "(" type-name ")" cast | unary
|
||||||
static Node *compound_literal(Token **rest, Token *tok, Type *ty,
|
|
||||||
Token *start) {
|
|
||||||
if (scope_depth == 0) {
|
|
||||||
Obj *var = new_anon_gvar(ty);
|
|
||||||
gvar_initializer(rest, tok, var);
|
|
||||||
return new_var_node(var, start);
|
|
||||||
}
|
|
||||||
Obj *var = new_lvar(new_unique_name(), ty);
|
|
||||||
Node *lhs = lvar_initializer(rest, tok, var);
|
|
||||||
Node *rhs = new_var_node(var, tok);
|
|
||||||
return new_binary(ND_COMMA, lhs, rhs, tok);
|
|
||||||
}
|
|
||||||
|
|
||||||
// cast = "(" type-name ")" "{" compound-literal
|
|
||||||
// | "(" type-name ")" cast
|
|
||||||
// | unary
|
|
||||||
static Node *cast(Token **rest, Token *tok) {
|
static Node *cast(Token **rest, Token *tok) {
|
||||||
if (EQUAL(tok, "(") && is_typename(tok->next)) {
|
if (EQUAL(tok, "(") && is_typename(tok->next)) {
|
||||||
Token *start = tok;
|
Token *start = tok;
|
||||||
Type *ty = typename(&tok, tok->next);
|
Type *ty = typename(&tok, tok->next);
|
||||||
tok = skip(tok, ')');
|
tok = skip(tok, ')');
|
||||||
// compound literal
|
// compound literal
|
||||||
if (EQUAL(tok, "{")) return compound_literal(rest, tok, ty, start);
|
if (EQUAL(tok, "{")) return unary(rest, start);
|
||||||
// type cast
|
// type cast
|
||||||
Node *node = new_cast(cast(rest, tok), ty);
|
Node *node = new_cast(cast(rest, tok), ty);
|
||||||
node->tok = start;
|
node->tok = start;
|
||||||
|
@ -2612,9 +2582,10 @@ static Node *unary(Token **rest, Token *tok) {
|
||||||
return new_unary(ND_ADDR, lhs, tok);
|
return new_unary(ND_ADDR, lhs, tok);
|
||||||
}
|
}
|
||||||
if (EQUAL(tok, "*")) {
|
if (EQUAL(tok, "*")) {
|
||||||
// [C18 6.5.3.2p4] This is an oddity in the C spec, but dereferencing
|
// [https://www.sigbus.info/n1570#6.5.3.2p4] This is an oddity
|
||||||
// a function shouldn't do anything. If foo is a function, `*foo`,
|
// in the C spec, but dereferencing a function shouldn't do
|
||||||
// `**foo` or `*****foo` are all equivalent to just `foo`.
|
// anything. If foo is a function, `*foo`, `**foo` or `*****foo`
|
||||||
|
// are all equivalent to just `foo`.
|
||||||
Node *node = cast(rest, tok->next);
|
Node *node = cast(rest, tok->next);
|
||||||
add_type(node);
|
add_type(node);
|
||||||
if (node->ty->kind == TY_FUNC) return node;
|
if (node->ty->kind == TY_FUNC) return node;
|
||||||
|
@ -2640,14 +2611,14 @@ static Node *unary(Token **rest, Token *tok) {
|
||||||
return postfix(rest, tok);
|
return postfix(rest, tok);
|
||||||
}
|
}
|
||||||
|
|
||||||
// struct-members = (typespec declarator ("," declarator)* ";")*
|
// struct-members = (declspec declarator ("," declarator)* ";")*
|
||||||
static void struct_members(Token **rest, Token *tok, Type *ty) {
|
static void struct_members(Token **rest, Token *tok, Type *ty) {
|
||||||
Member head = {};
|
Member head = {};
|
||||||
Member *cur = &head;
|
Member *cur = &head;
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
while (!EQUAL(tok, "}")) {
|
while (!EQUAL(tok, "}")) {
|
||||||
VarAttr attr = {};
|
VarAttr attr = {};
|
||||||
Type *basety = typespec(&tok, tok, &attr);
|
Type *basety = declspec(&tok, tok, &attr);
|
||||||
bool first = true;
|
bool first = true;
|
||||||
// Anonymous struct member
|
// Anonymous struct member
|
||||||
if ((basety->kind == TY_STRUCT || basety->kind == TY_UNION) &&
|
if ((basety->kind == TY_STRUCT || basety->kind == TY_UNION) &&
|
||||||
|
@ -2708,8 +2679,8 @@ static Type *struct_union_decl(Token **rest, Token *tok) {
|
||||||
}
|
}
|
||||||
if (tag && !EQUAL(tok, "{")) {
|
if (tag && !EQUAL(tok, "{")) {
|
||||||
*rest = tok;
|
*rest = tok;
|
||||||
TagScope *sc = find_tag(tag);
|
Type *ty2 = find_tag(tag);
|
||||||
if (sc) return sc->ty;
|
if (ty2) return ty2;
|
||||||
ty->size = -1;
|
ty->size = -1;
|
||||||
push_tag_scope(tag, ty);
|
push_tag_scope(tag, ty);
|
||||||
return ty;
|
return ty;
|
||||||
|
@ -2721,10 +2692,10 @@ static Type *struct_union_decl(Token **rest, Token *tok) {
|
||||||
if (tag) {
|
if (tag) {
|
||||||
// If this is a redefinition, overwrite a previous type.
|
// If this is a redefinition, overwrite a previous type.
|
||||||
// Otherwise, register the struct type.
|
// Otherwise, register the struct type.
|
||||||
TagScope *sc = find_tag(tag);
|
Type *ty2 = hashmap_get2(&scope->tags, tag->loc, tag->len);
|
||||||
if (sc && sc->depth == scope_depth) {
|
if (ty2) {
|
||||||
*sc->ty = *ty;
|
*ty2 = *ty;
|
||||||
return sc->ty;
|
return ty2;
|
||||||
}
|
}
|
||||||
push_tag_scope(tag, ty);
|
push_tag_scope(tag, ty);
|
||||||
}
|
}
|
||||||
|
@ -2837,7 +2808,8 @@ static Node *new_inc_dec(Node *node, Token *tok, int addend) {
|
||||||
node->ty);
|
node->ty);
|
||||||
}
|
}
|
||||||
|
|
||||||
// postfix = ident "(" func-args ")" postfix-tail*
|
// postfix = "(" type-name ")" "{" initializer-list "}"
|
||||||
|
// | ident "(" func-args ")" postfix-tail*
|
||||||
// | primary postfix-tail*
|
// | primary postfix-tail*
|
||||||
//
|
//
|
||||||
// postfix-tail = "[" expr "]"
|
// postfix-tail = "[" expr "]"
|
||||||
|
@ -2847,6 +2819,21 @@ static Node *new_inc_dec(Node *node, Token *tok, int addend) {
|
||||||
// | "++"
|
// | "++"
|
||||||
// | "--"
|
// | "--"
|
||||||
static Node *postfix(Token **rest, Token *tok) {
|
static Node *postfix(Token **rest, Token *tok) {
|
||||||
|
if (EQUAL(tok, "(") && is_typename(tok->next)) {
|
||||||
|
// Compound literal
|
||||||
|
Token *start = tok;
|
||||||
|
Type *ty = typename(&tok, tok->next);
|
||||||
|
tok = skip(tok, ')');
|
||||||
|
if (scope->next == NULL) {
|
||||||
|
Obj *var = new_anon_gvar(ty);
|
||||||
|
gvar_initializer(rest, tok, var);
|
||||||
|
return new_var_node(var, start);
|
||||||
|
}
|
||||||
|
Obj *var = new_lvar("", ty);
|
||||||
|
Node *lhs = lvar_initializer(rest, tok, var);
|
||||||
|
Node *rhs = new_var_node(var, tok);
|
||||||
|
return new_binary(ND_COMMA, lhs, rhs, start);
|
||||||
|
}
|
||||||
Node *node = primary(&tok, tok);
|
Node *node = primary(&tok, tok);
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (EQUAL(tok, "(")) {
|
if (EQUAL(tok, "(")) {
|
||||||
|
@ -2961,7 +2948,7 @@ static Node *generic_selection(Token **rest, Token *tok) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// primary = "(" "{" stmt stmt* "}" ")"
|
// primary = "(" "{" stmt+ "}" ")"
|
||||||
// | "(" expr ")"
|
// | "(" expr ")"
|
||||||
// | "sizeof" "(" type-name ")"
|
// | "sizeof" "(" type-name ")"
|
||||||
// | "sizeof" unary
|
// | "sizeof" unary
|
||||||
|
@ -3367,8 +3354,9 @@ static Token *function(Token *tok, Type *basety, VarAttr *attr) {
|
||||||
fn->va_area = new_lvar("__va_area__", array_of(ty_char, 136));
|
fn->va_area = new_lvar("__va_area__", array_of(ty_char, 136));
|
||||||
fn->alloca_bottom = new_lvar("__alloca_size__", pointer_to(ty_char));
|
fn->alloca_bottom = new_lvar("__alloca_size__", pointer_to(ty_char));
|
||||||
tok = skip(tok, '{');
|
tok = skip(tok, '{');
|
||||||
// [C18 6.4.2.2] "__func__" is automatically defined as a
|
// [https://www.sigbus.info/n1570#6.4.2.2p1] "__func__" is
|
||||||
// local variable containing the current function name.
|
// automatically defined as a local variable containing the
|
||||||
|
// current function name.
|
||||||
push_scope("__func__")->var =
|
push_scope("__func__")->var =
|
||||||
new_string_literal(fn->name, array_of(ty_char, strlen(fn->name) + 1));
|
new_string_literal(fn->name, array_of(ty_char, strlen(fn->name) + 1));
|
||||||
// [GNU] __FUNCTION__ is yet another name of __func__.
|
// [GNU] __FUNCTION__ is yet another name of __func__.
|
||||||
|
@ -3401,7 +3389,7 @@ static Token *global_variable(Token *tok, Type *basety, VarAttr *attr) {
|
||||||
if (attr->align) var->align = attr->align;
|
if (attr->align) var->align = attr->align;
|
||||||
if (EQUAL(tok, "=")) {
|
if (EQUAL(tok, "=")) {
|
||||||
gvar_initializer(&tok, tok->next, var);
|
gvar_initializer(&tok, tok->next, var);
|
||||||
} else if (!attr->is_extern) {
|
} else if (!attr->is_extern && !attr->is_tls) {
|
||||||
var->is_tentative = true;
|
var->is_tentative = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3537,7 +3525,7 @@ Obj *parse(Token *tok) {
|
||||||
}
|
}
|
||||||
VarAttr attr = {};
|
VarAttr attr = {};
|
||||||
tok = attribute_list(tok, &attr, thing_attributes);
|
tok = attribute_list(tok, &attr, thing_attributes);
|
||||||
Type *basety = typespec(&tok, tok, &attr);
|
Type *basety = declspec(&tok, tok, &attr);
|
||||||
if (attr.is_typedef) {
|
if (attr.is_typedef) {
|
||||||
tok = parse_typedef(tok, basety);
|
tok = parse_typedef(tok, basety);
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -96,7 +96,7 @@ static Token *skip_line(Token *tok) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static Token *copy_token(Token *tok) {
|
static Token *copy_token(Token *tok) {
|
||||||
Token *t = calloc(1, sizeof(Token));
|
Token *t = alloc_token();
|
||||||
*t = *tok;
|
*t = *tok;
|
||||||
t->next = NULL;
|
t->next = NULL;
|
||||||
return t;
|
return t;
|
||||||
|
@ -234,9 +234,8 @@ static Token *copy_line(Token **rest, Token *tok) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static Token *new_num_token(int val, Token *tmpl) {
|
static Token *new_num_token(int val, Token *tmpl) {
|
||||||
char buf[30];
|
char *buf = xasprintf("%d\n", val);
|
||||||
sprintf(buf, "%d\n", val);
|
return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, buf));
|
||||||
return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, strdup(buf)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static Token *read_const_expr(Token **rest, Token *tok) {
|
static Token *read_const_expr(Token **rest, Token *tok) {
|
||||||
|
@ -270,10 +269,10 @@ static long eval_const_expr(Token **rest, Token *tok) {
|
||||||
Token *expr = read_const_expr(rest, tok->next);
|
Token *expr = read_const_expr(rest, tok->next);
|
||||||
expr = preprocess2(expr);
|
expr = preprocess2(expr);
|
||||||
if (expr->kind == TK_EOF) error_tok(start, "no expression");
|
if (expr->kind == TK_EOF) error_tok(start, "no expression");
|
||||||
// [C18 6.10.1.4] The standard requires we replace remaining
|
// [https://www.sigbus.info/n1570#6.10.1p4] The standard requires
|
||||||
// non-macro identifiers with "0" before evaluating a constant
|
// we replace remaining non-macro identifiers with "0" before
|
||||||
// expression. For example, `#if foo` is equivalent to `#if 0`
|
// evaluating a constant expression. For example, `#if foo` is
|
||||||
// if foo is not defined.
|
// equivalent to `#if 0` if foo is not defined.
|
||||||
for (Token *t = expr; t->kind != TK_EOF; t = t->next) {
|
for (Token *t = expr; t->kind != TK_EOF; t = t->next) {
|
||||||
if (t->kind == TK_IDENT) {
|
if (t->kind == TK_IDENT) {
|
||||||
Token *next = t->next;
|
Token *next = t->next;
|
||||||
|
@ -453,8 +452,7 @@ static Token *stringize(Token *hash, Token *arg) {
|
||||||
// Concatenate two tokens to create a new token.
|
// Concatenate two tokens to create a new token.
|
||||||
static Token *paste(Token *lhs, Token *rhs) {
|
static Token *paste(Token *lhs, Token *rhs) {
|
||||||
// Paste the two tokens.
|
// Paste the two tokens.
|
||||||
char *buf = calloc(1, lhs->len + rhs->len + 1);
|
char *buf = xasprintf("%.*s%.*s", lhs->len, lhs->loc, rhs->len, rhs->loc);
|
||||||
sprintf(buf, "%.*s%.*s", lhs->len, lhs->loc, rhs->len, rhs->loc);
|
|
||||||
// Tokenize the resulting string.
|
// Tokenize the resulting string.
|
||||||
Token *tok = tokenize(new_file(lhs->file->name, lhs->file->file_no, buf));
|
Token *tok = tokenize(new_file(lhs->file->name, lhs->file->file_no, buf));
|
||||||
if (tok->next->kind != TK_EOF)
|
if (tok->next->kind != TK_EOF)
|
||||||
|
@ -706,7 +704,7 @@ static char *detect_include_guard(Token *tok) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static Token *include_file(Token *tok, char *path) {
|
static Token *include_file(Token *tok, char *path, Token *filename_tok) {
|
||||||
// Check for "#pragma once"
|
// Check for "#pragma once"
|
||||||
if (hashmap_get(&pragma_once, path)) return tok;
|
if (hashmap_get(&pragma_once, path)) return tok;
|
||||||
// If we read the same file before, and if the file was guarded
|
// If we read the same file before, and if the file was guarded
|
||||||
|
@ -716,7 +714,8 @@ static Token *include_file(Token *tok, char *path) {
|
||||||
char *guard_name = hashmap_get(&include_guards, path);
|
char *guard_name = hashmap_get(&include_guards, path);
|
||||||
if (guard_name && hashmap_get(¯os, guard_name)) return tok;
|
if (guard_name && hashmap_get(¯os, guard_name)) return tok;
|
||||||
Token *tok2 = tokenize_file(path);
|
Token *tok2 = tokenize_file(path);
|
||||||
if (!tok2) error_tok(tok, "%s: cannot open file: %s", path, strerror(errno));
|
if (!tok2)
|
||||||
|
error_tok(filename_tok, "%s: cannot open file: %s", path, strerror(errno));
|
||||||
guard_name = detect_include_guard(tok2);
|
guard_name = detect_include_guard(tok2);
|
||||||
if (guard_name) hashmap_put(&include_guards, path, guard_name);
|
if (guard_name) hashmap_put(&include_guards, path, guard_name);
|
||||||
return append(tok2, tok);
|
return append(tok2, tok);
|
||||||
|
@ -760,19 +759,19 @@ static Token *preprocess2(Token *tok) {
|
||||||
char *path =
|
char *path =
|
||||||
xasprintf("%s/%s", dirname(strdup(start->file->name)), filename);
|
xasprintf("%s/%s", dirname(strdup(start->file->name)), filename);
|
||||||
if (fileexists(path)) {
|
if (fileexists(path)) {
|
||||||
tok = include_file(tok, path);
|
tok = include_file(tok, path, start->next->next);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
char *path = search_include_paths(filename);
|
char *path = search_include_paths(filename);
|
||||||
tok = include_file(tok, path ? path : filename);
|
tok = include_file(tok, path ? path : filename, start->next->next);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (EQUAL(tok, "include_next")) {
|
if (EQUAL(tok, "include_next")) {
|
||||||
bool ignore;
|
bool ignore;
|
||||||
char *filename = read_include_filename(&tok, tok->next, &ignore);
|
char *filename = read_include_filename(&tok, tok->next, &ignore);
|
||||||
char *path = search_include_next(filename);
|
char *path = search_include_next(filename);
|
||||||
tok = include_file(tok, path ? path : filename);
|
tok = include_file(tok, path ? path : filename, start->next->next);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (EQUAL(tok, "define")) {
|
if (EQUAL(tok, "define")) {
|
||||||
|
@ -914,17 +913,13 @@ static char *format_date(struct tm *tm) {
|
||||||
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
||||||
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
|
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
|
||||||
};
|
};
|
||||||
char buf[30];
|
return xasprintf("\"%s %2d %d\"", mon[tm->tm_mon], tm->tm_mday,
|
||||||
sprintf(buf, "\"%s %2d %d\"", mon[tm->tm_mon], tm->tm_mday,
|
tm->tm_year + 1900);
|
||||||
tm->tm_year + 1900);
|
|
||||||
return strdup(buf);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// __TIME__ is expanded to the current time, e.g. "13:34:03".
|
// __TIME__ is expanded to the current time, e.g. "13:34:03".
|
||||||
static char *format_time(struct tm *tm) {
|
static char *format_time(struct tm *tm) {
|
||||||
char buf[30];
|
return xasprintf("\"%02d:%02d:%02d\"", tm->tm_hour, tm->tm_min, tm->tm_sec);
|
||||||
sprintf(buf, "\"%02d:%02d:%02d\"", tm->tm_hour, tm->tm_min, tm->tm_sec);
|
|
||||||
return strdup(buf);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void init_macros(void) {
|
void init_macros(void) {
|
||||||
|
@ -1302,11 +1297,11 @@ static void join_adjacent_string_literals(Token *tok) {
|
||||||
}
|
}
|
||||||
// Second pass: concatenate adjacent string literals.
|
// Second pass: concatenate adjacent string literals.
|
||||||
for (Token *tok1 = tok; tok1->kind != TK_EOF;) {
|
for (Token *tok1 = tok; tok1->kind != TK_EOF;) {
|
||||||
Token *tok2 = tok1->next;
|
if (tok1->kind != TK_STR || tok1->next->kind != TK_STR) {
|
||||||
if (tok1->kind != TK_STR || tok2->kind != TK_STR) {
|
|
||||||
tok1 = tok1->next;
|
tok1 = tok1->next;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
#if 0
|
||||||
assert(tok1->ty->base->size == tok2->ty->base->size);
|
assert(tok1->ty->base->size == tok2->ty->base->size);
|
||||||
Token *t = copy_token(tok1);
|
Token *t = copy_token(tok1);
|
||||||
t->ty =
|
t->ty =
|
||||||
|
@ -1317,6 +1312,25 @@ static void join_adjacent_string_literals(Token *tok) {
|
||||||
tok2->str, tok2->ty->size);
|
tok2->str, tok2->ty->size);
|
||||||
t->len = strlen(t->loc);
|
t->len = strlen(t->loc);
|
||||||
*tok1 = *t;
|
*tok1 = *t;
|
||||||
|
#else
|
||||||
|
Token *tok2 = tok1->next;
|
||||||
|
while (tok2->kind == TK_STR) tok2 = tok2->next;
|
||||||
|
int len = tok1->ty->array_len;
|
||||||
|
for (Token *t = tok1->next; t != tok2; t = t->next) {
|
||||||
|
len = len + t->ty->array_len - 1;
|
||||||
|
}
|
||||||
|
char *buf = calloc(tok1->ty->base->size, len);
|
||||||
|
int i = 0;
|
||||||
|
for (Token *t = tok1; t != tok2; t = t->next) {
|
||||||
|
memcpy(buf + i, t->str, t->ty->size);
|
||||||
|
i = i + t->ty->size - t->ty->base->size;
|
||||||
|
}
|
||||||
|
*tok1 = *copy_token(tok1);
|
||||||
|
tok1->ty = array_of(tok1->ty->base, len);
|
||||||
|
tok1->str = buf;
|
||||||
|
tok1->next = tok2;
|
||||||
|
tok1 = tok2;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,4 +0,0 @@
|
||||||
main(void) {
|
|
||||||
void *p;
|
|
||||||
p = "hello";
|
|
||||||
}
|
|
|
@ -445,6 +445,15 @@ int main() {
|
||||||
};
|
};
|
||||||
x.a;
|
x.a;
|
||||||
}));
|
}));
|
||||||
|
ASSERT(1, ({
|
||||||
|
union {
|
||||||
|
int a;
|
||||||
|
char b;
|
||||||
|
} x = {
|
||||||
|
1,
|
||||||
|
};
|
||||||
|
x.a;
|
||||||
|
}));
|
||||||
ASSERT(2, ({
|
ASSERT(2, ({
|
||||||
enum {
|
enum {
|
||||||
x,
|
x,
|
||||||
|
|
|
@ -392,6 +392,24 @@ int main() {
|
||||||
} x = {1}, y = {2};
|
} x = {1}, y = {2};
|
||||||
(0 ? x : y).a;
|
(0 ? x : y).a;
|
||||||
}));
|
}));
|
||||||
|
ASSERT(2, ({
|
||||||
|
struct {
|
||||||
|
int a;
|
||||||
|
} x = {1}, y = {2};
|
||||||
|
(x = y).a;
|
||||||
|
}));
|
||||||
|
ASSERT(1, ({
|
||||||
|
struct {
|
||||||
|
int a;
|
||||||
|
} x = {1}, y = {2};
|
||||||
|
(1 ? x : y).a;
|
||||||
|
}));
|
||||||
|
ASSERT(2, ({
|
||||||
|
struct {
|
||||||
|
int a;
|
||||||
|
} x = {1}, y = {2};
|
||||||
|
(0 ? x : y).a;
|
||||||
|
}));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,7 @@ static void verror_at(char *filename, char *input, int line_no, char *loc,
|
||||||
int indent = fprintf(stderr, "%s:%d: ", filename, line_no);
|
int indent = fprintf(stderr, "%s:%d: ", filename, line_no);
|
||||||
fprintf(stderr, "%.*s\n", (int)(end - line), line);
|
fprintf(stderr, "%.*s\n", (int)(end - line), line);
|
||||||
// Show the error message.
|
// Show the error message.
|
||||||
int pos = str_width(line, loc - line) + indent;
|
int pos = display_width(line, loc - line) + indent;
|
||||||
fprintf(stderr, "%*s", pos, ""); // print pos spaces.
|
fprintf(stderr, "%*s", pos, ""); // print pos spaces.
|
||||||
fprintf(stderr, "^ ");
|
fprintf(stderr, "^ ");
|
||||||
vfprintf(stderr, fmt, ap);
|
vfprintf(stderr, fmt, ap);
|
||||||
|
@ -53,6 +53,7 @@ void error_at(char *loc, char *fmt, ...) {
|
||||||
va_list ap;
|
va_list ap;
|
||||||
va_start(ap, fmt);
|
va_start(ap, fmt);
|
||||||
verror_at(current_file->name, current_file->contents, line_no, loc, fmt, ap);
|
verror_at(current_file->name, current_file->contents, line_no, loc, fmt, ap);
|
||||||
|
va_end(ap);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,7 +65,7 @@ void error_tok(Token *tok, char *fmt, ...) {
|
||||||
verror_at(t->file->name, t->file->contents, t->line_no, t->loc, fmt, ap);
|
verror_at(t->file->name, t->file->contents, t->line_no, t->loc, fmt, ap);
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
}
|
}
|
||||||
va_end(va);
|
va_end(ap);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,6 +74,7 @@ void warn_tok(Token *tok, char *fmt, ...) {
|
||||||
va_start(ap, fmt);
|
va_start(ap, fmt);
|
||||||
verror_at(tok->file->name, tok->file->contents, tok->line_no, tok->loc, fmt,
|
verror_at(tok->file->name, tok->file->contents, tok->line_no, tok->loc, fmt,
|
||||||
ap);
|
ap);
|
||||||
|
va_end(ap);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int is_space(int c) {
|
static int is_space(int c) {
|
||||||
|
@ -103,9 +105,9 @@ Token *skip(Token *tok, char op) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a new token and add it as the next token of `cur`.
|
// Create a new token.
|
||||||
static Token *new_token(TokenKind kind, char *start, char *end) {
|
static Token *new_token(TokenKind kind, char *start, char *end) {
|
||||||
Token *tok = calloc(1, sizeof(Token));
|
Token *tok = alloc_token();
|
||||||
tok->kind = kind;
|
tok->kind = kind;
|
||||||
tok->loc = start;
|
tok->loc = start;
|
||||||
tok->len = end - start;
|
tok->len = end - start;
|
||||||
|
@ -117,18 +119,17 @@ static Token *new_token(TokenKind kind, char *start, char *end) {
|
||||||
return tok;
|
return tok;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read an identifier and returns a pointer pointing to the end
|
// Read an identifier and returns the length of it.
|
||||||
// of an identifier.
|
// If p does not point to a valid identifier, 0 is returned.
|
||||||
//
|
static int read_ident(char *start) {
|
||||||
// Returns null if p does not point to a valid identifier.
|
char *p = start;
|
||||||
static char *read_ident(char *p) {
|
|
||||||
uint32_t c = decode_utf8(&p, p);
|
uint32_t c = decode_utf8(&p, p);
|
||||||
if (!is_ident1(c)) return NULL;
|
if (!is_ident1(c)) return 0;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
char *q;
|
char *q;
|
||||||
c = decode_utf8(&q, p);
|
c = decode_utf8(&q, p);
|
||||||
if (!('a' <= c && c <= 'f') && !is_ident2(c)) {
|
if (!('a' <= c && c <= 'f') && !is_ident2(c)) {
|
||||||
return p;
|
return p - start;
|
||||||
}
|
}
|
||||||
p = q;
|
p = q;
|
||||||
}
|
}
|
||||||
|
@ -140,6 +141,19 @@ static int from_hex(char c) {
|
||||||
return c - 'A' + 10;
|
return c - 'A' + 10;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Read a punctuator token from p and returns its length.
|
||||||
|
static int read_punct(char *p) {
|
||||||
|
static char *kw[] = {"<<=", ">>=", "...", "==", "!=", "<=", ">=", "->",
|
||||||
|
"+=", "-=", "*=", "/=", "++", "--", "%=", "&=",
|
||||||
|
"|=", "^=", "&&", "||", "<<", ">>", "##"};
|
||||||
|
for (int i = 0; i < sizeof(kw) / sizeof(*kw); i++) {
|
||||||
|
if (startswith(p, kw[i])) {
|
||||||
|
return strlen(kw[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ispunct(*p) ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
static bool is_keyword(Token *tok) {
|
static bool is_keyword(Token *tok) {
|
||||||
static HashMap map;
|
static HashMap map;
|
||||||
if (map.capacity == 0) {
|
if (map.capacity == 0) {
|
||||||
|
@ -190,6 +204,17 @@ static int read_escaped_char(char **new_pos, char *p) {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
*new_pos = p + 1;
|
*new_pos = p + 1;
|
||||||
|
// Escape sequences are defined using themselves here. E.g.
|
||||||
|
// '\n' is implemented using '\n'. This tautological definition
|
||||||
|
// works because the compiler that compiles our compiler knows
|
||||||
|
// what '\n' actually is. In other words, we "inherit" the ASCII
|
||||||
|
// code of '\n' from the compiler that compiles our compiler,
|
||||||
|
// so we don't have to teach the actual code here.
|
||||||
|
//
|
||||||
|
// This fact has huge implications not only for the correctness
|
||||||
|
// of the compiler but also for the security of the generated code.
|
||||||
|
// For more info, read "Reflections on Trusting Trust" by Ken Thompson.
|
||||||
|
// https://github.com/rui314/chibicc/wiki/thompson1984.pdf
|
||||||
switch (*p) {
|
switch (*p) {
|
||||||
case 'a':
|
case 'a':
|
||||||
return '\a';
|
return '\a';
|
||||||
|
@ -217,7 +242,7 @@ static int read_escaped_char(char **new_pos, char *p) {
|
||||||
static char *string_literal_end(char *p) {
|
static char *string_literal_end(char *p) {
|
||||||
char *start = p;
|
char *start = p;
|
||||||
for (; *p != '"'; p++) {
|
for (; *p != '"'; p++) {
|
||||||
if (*p == '\0') error_at(start, "unclosed string literal");
|
if (*p == '\n' || *p == '\0') error_at(start, "unclosed string literal");
|
||||||
if (*p == '\\') p++;
|
if (*p == '\\') p++;
|
||||||
}
|
}
|
||||||
return p;
|
return p;
|
||||||
|
@ -225,7 +250,7 @@ static char *string_literal_end(char *p) {
|
||||||
|
|
||||||
static Token *read_string_literal(char *start, char *quote) {
|
static Token *read_string_literal(char *start, char *quote) {
|
||||||
char *end = string_literal_end(quote + 1);
|
char *end = string_literal_end(quote + 1);
|
||||||
char *buf = calloc(1, end - quote);
|
char *buf = calloc(2, end - quote);
|
||||||
int len = 0;
|
int len = 0;
|
||||||
for (char *p = quote + 1; p < end;) {
|
for (char *p = quote + 1; p < end;) {
|
||||||
if (*p == '\\')
|
if (*p == '\\')
|
||||||
|
@ -409,7 +434,7 @@ static void convert_pp_number(Token *tok) {
|
||||||
void convert_pp_tokens(Token *tok) {
|
void convert_pp_tokens(Token *tok) {
|
||||||
for (Token *t = tok; t->kind != TK_EOF; t = t->next) {
|
for (Token *t = tok; t->kind != TK_EOF; t = t->next) {
|
||||||
if (is_keyword(t))
|
if (is_keyword(t))
|
||||||
t->kind = TK_RESERVED;
|
t->kind = TK_KEYWORD;
|
||||||
else if (t->kind == TK_PP_NUM)
|
else if (t->kind == TK_PP_NUM)
|
||||||
convert_pp_number(t);
|
convert_pp_number(t);
|
||||||
}
|
}
|
||||||
|
@ -546,34 +571,17 @@ Token *tokenize(File *file) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Identifier or keyword
|
// Identifier or keyword
|
||||||
char *q;
|
int ident_len = read_ident(p);
|
||||||
if ((q = read_ident(p)) != NULL) {
|
if (ident_len) {
|
||||||
cur = cur->next = new_token(TK_IDENT, p, q);
|
cur = cur->next = new_token(TK_IDENT, p, p + ident_len);
|
||||||
p = q;
|
p += cur->len;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Three-letter punctuators
|
// Punctuators
|
||||||
if (LOOKINGAT(p, "<<=") || LOOKINGAT(p, ">>=") || LOOKINGAT(p, "...")) {
|
int punct_len = read_punct(p);
|
||||||
cur = cur->next = new_token(TK_RESERVED, p, p + 3);
|
if (punct_len) {
|
||||||
p += 3;
|
cur = cur->next = new_token(TK_PUNCT, p, p + punct_len);
|
||||||
continue;
|
p += cur->len;
|
||||||
}
|
|
||||||
// Two-letter punctuators
|
|
||||||
if (LOOKINGAT(p, "==") || LOOKINGAT(p, "!=") || LOOKINGAT(p, "<=") ||
|
|
||||||
LOOKINGAT(p, ">=") || LOOKINGAT(p, "->") || LOOKINGAT(p, "+=") ||
|
|
||||||
LOOKINGAT(p, "-=") || LOOKINGAT(p, "*=") || LOOKINGAT(p, "/=") ||
|
|
||||||
LOOKINGAT(p, "++") || LOOKINGAT(p, "--") || LOOKINGAT(p, "%=") ||
|
|
||||||
LOOKINGAT(p, "&=") || LOOKINGAT(p, "|=") || LOOKINGAT(p, "^=") ||
|
|
||||||
LOOKINGAT(p, "&&") || LOOKINGAT(p, "||") || LOOKINGAT(p, "<<") ||
|
|
||||||
LOOKINGAT(p, ">>") || LOOKINGAT(p, "##")) {
|
|
||||||
cur = cur->next = new_token(TK_RESERVED, p, p + 2);
|
|
||||||
p += 2;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Single-letter punctuators
|
|
||||||
if (ispunct(*p)) {
|
|
||||||
cur = cur->next = new_token(TK_RESERVED, p, p + 1);
|
|
||||||
p++;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
error_at(p, "invalid token");
|
error_at(p, "invalid token");
|
||||||
|
@ -665,6 +673,7 @@ static void remove_backslash_newline(char *p) {
|
||||||
p[j++] = p[i++];
|
p[j++] = p[i++];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (; n > 0; n--) p[j++] = '\n';
|
||||||
p[j] = '\0';
|
p[j] = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -710,6 +719,11 @@ static void convert_universal_chars(char *p) {
|
||||||
Token *tokenize_file(char *path) {
|
Token *tokenize_file(char *path) {
|
||||||
char *p = read_file(path);
|
char *p = read_file(path);
|
||||||
if (!p) return NULL;
|
if (!p) return NULL;
|
||||||
|
// UTF-8 texts may start with a 3-byte "BOM" marker sequence.
|
||||||
|
// If exists, just skip them because they are useless bytes.
|
||||||
|
// (It is actually not recommended to add BOM markers to UTF-8
|
||||||
|
// texts, but it's not uncommon particularly on Windows.)
|
||||||
|
if (!memcmp(p, "\xef\xbb\xbf", 3)) p += 3;
|
||||||
canonicalize_newline(p);
|
canonicalize_newline(p);
|
||||||
remove_backslash_newline(p);
|
remove_backslash_newline(p);
|
||||||
convert_universal_chars(p);
|
convert_universal_chars(p);
|
||||||
|
|
|
@ -18,7 +18,7 @@ Type ty_double[1] = {{TY_DOUBLE, 8, 8}};
|
||||||
Type ty_ldouble[1] = {{TY_LDOUBLE, 16, 16}};
|
Type ty_ldouble[1] = {{TY_LDOUBLE, 16, 16}};
|
||||||
|
|
||||||
static Type *new_type(TypeKind kind, int size, int align) {
|
static Type *new_type(TypeKind kind, int size, int align) {
|
||||||
Type *ty = calloc(1, sizeof(Type));
|
Type *ty = alloc_type();
|
||||||
ty->kind = kind;
|
ty->kind = kind;
|
||||||
ty->size = size;
|
ty->size = size;
|
||||||
ty->align = align;
|
ty->align = align;
|
||||||
|
@ -77,7 +77,7 @@ bool is_compatible(Type *t1, Type *t2) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Type *copy_type(Type *ty) {
|
Type *copy_type(Type *ty) {
|
||||||
Type *ret = calloc(1, sizeof(Type));
|
Type *ret = alloc_type();
|
||||||
*ret = *ty;
|
*ret = *ty;
|
||||||
ret->origin = ty;
|
ret->origin = ty;
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
@ -66,9 +66,9 @@ static bool in_range(uint32_t *range, uint32_t c) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// C11 allows not only ASCII but some multibyte characters in certan
|
// [https://www.sigbus.info/n1570#D] C11 allows not only ASCII but
|
||||||
// Unicode ranges to be used in an identifier. See C11 Annex D for the
|
// some multibyte characters in certan Unicode ranges to be used in an
|
||||||
// details.
|
// identifier.
|
||||||
//
|
//
|
||||||
// This function returns true if a given character is acceptable as
|
// This function returns true if a given character is acceptable as
|
||||||
// the first character of an identifier.
|
// the first character of an identifier.
|
||||||
|
@ -108,7 +108,7 @@ bool is_ident2(uint32_t c) {
|
||||||
|
|
||||||
// Returns the number of columns needed to display a given
|
// Returns the number of columns needed to display a given
|
||||||
// string in a fixed-width font.
|
// string in a fixed-width font.
|
||||||
int str_width(char *p, int len) {
|
int display_width(char *p, int len) {
|
||||||
char *start = p;
|
char *start = p;
|
||||||
int w = 0;
|
int w = 0;
|
||||||
while (p - start < len) {
|
while (p - start < len) {
|
||||||
|
|
Loading…
Reference in New Issue