cosmopolitan/third_party/chibicc/file.c

107 lines
2.5 KiB
C

#include "third_party/chibicc/chibicc.h"
// Slurps contents of file.
char *read_file(const char *path) {
char *p;
FILE *fp;
int buflen, nread, end, n;
if (!strcmp(path, "-")) {
fp = stdin;
} else {
fp = fopen(path, "r");
if (!fp) return NULL;
}
buflen = 4096;
nread = 0;
p = calloc(1, buflen);
for (;;) {
end = buflen - 2;
n = fread(p + nread, 1, end - nread, fp);
if (n == 0) break;
nread += n;
if (nread == end) {
buflen *= 2;
p = realloc(p, buflen);
}
}
if (fp != stdin) fclose(fp);
if (nread > 0 && p[nread - 1] == '\\') {
p[nread - 1] = '\n';
} else if (nread == 0 || p[nread - 1] != '\n') {
p[nread++] = '\n';
}
p[nread] = '\0';
return p;
}
char *skip_bom(char *p) {
// UTF-8 texts may start with a 3-byte "BOM" marker sequence.
// If exists, just skip them because they are useless bytes.
// (It is actually not recommended to add BOM markers to UTF-8
// texts, but it's not uncommon particularly on Windows.)
if (!memcmp(p, "\357\273\277", 3)) p += 3;
return p;
}
// Replaces \r or \r\n with \n.
void canonicalize_newline(char *p) {
int i = 0, j = 0;
while (p[i]) {
if (p[i] == '\r' && p[i + 1] == '\n') {
i += 2;
p[j++] = '\n';
} else if (p[i] == '\r') {
i++;
p[j++] = '\n';
} else {
p[j++] = p[i++];
}
}
p[j] = '\0';
}
// Removes backslashes followed by a newline.
void remove_backslash_newline(char *p) {
int i = 0, j = 0;
// We want to keep the number of newline characters so that
// the logical line number matches the physical one.
// This counter maintain the number of newlines we have removed.
int n = 0;
bool instring = false;
while (p[i]) {
if (instring) {
if (p[i] == '"' && p[i - 1] != '\\') {
instring = false;
}
} else {
if (p[i] == '"') {
instring = true;
} else if (p[i] == '/' && p[i + 1] == '*') {
p[j++] = p[i++];
p[j++] = p[i++];
while (p[i]) {
if (p[i] == '*' && p[i + 1] == '/') {
p[j++] = p[i++];
p[j++] = p[i++];
break;
} else {
p[j++] = p[i++];
}
}
continue;
}
}
if (p[i] == '\\' && p[i + 1] == '\n') {
i += 2;
n++;
} else if (p[i] == '\n') {
p[j++] = p[i++];
for (; n > 0; n--) p[j++] = '\n';
} else {
p[j++] = p[i++];
}
}
for (; n > 0; n--) p[j++] = '\n';
p[j] = '\0';
}