diff --git a/.clang-format b/.clang-format index 68a1584f..4c76b9c8 100644 --- a/.clang-format +++ b/.clang-format @@ -8,5 +8,8 @@ AlwaysBreakBeforeMultilineStrings: false AllowShortFunctionsOnASingleLine: false KeepEmptyLinesAtTheStartOfBlocks: true --- +Language: Cpp +AllowShortFunctionsOnASingleLine: true +--- Language: Proto ... diff --git a/Makefile b/Makefile index 6a3c3f5f..c5b9f6f1 100644 --- a/Makefile +++ b/Makefile @@ -109,6 +109,7 @@ include libc/runtime/runtime.mk # ├──systems include libc/unicode/unicode.mk # │ include third_party/dlmalloc/dlmalloc.mk # │ include libc/mem/mem.mk # │ +include libc/ohmyplus/ohmyplus.mk # │ include libc/zipos/zipos.mk # │ include third_party/dtoa/dtoa.mk # │ include libc/time/time.mk # │ diff --git a/build/definitions.mk b/build/definitions.mk index 7d01c175..60680845 100644 --- a/build/definitions.mk +++ b/build/definitions.mk @@ -166,7 +166,9 @@ DEFAULT_CXXFLAGS = \ -fno-rtti \ -fno-exceptions \ -fuse-cxa-atexit \ - -fno-threadsafe-statics + -fno-threadsafe-statics \ + -Wno-int-in-bool-context \ + -Wno-narrowing DEFAULT_ASFLAGS = \ -W \ @@ -278,6 +280,7 @@ PREPROCESS.flags = -E $(copt.flags) $(cc.flags) $(cpp.flags) PREPROCESS.lds.flags = -D__LINKER__ $(filter-out -g%,$(PREPROCESS.flags)) -P -xc COMPILE.c = $(CC) -S $(COMPILE.c.flags) +COMPILE.cxx = $(CXX) -S $(COMPILE.cxx.flags) COMPILE.i = $(CC) -S $(COMPILE.i.flags) COMPILE.f = $(FC) -S $(COMPILE.f.flags) COMPILE.F = $(FC) -S $(COMPILE.F.flags) diff --git a/build/rules.mk b/build/rules.mk index f0aec677..570df894 100644 --- a/build/rules.mk +++ b/build/rules.mk @@ -32,6 +32,7 @@ o/%.lds: %.lds; @ACTION=PREPROCESS build/compile $(PREPROCESS.lds) $(OUTPUT_OPTI o/%.inc: %.h; @ACTION=PREPROCESS build/compile $(PREPROCESS) $(OUTPUT_OPTION) -D__ASSEMBLER__ -P $< o/%.pkg:; @build/package $(OUTPUT_OPTION) $(addprefix -d,$(filter %.pkg,$^)) $(filter %.o,$^) o/%.h.ok: %.h; @ACTION=CHECK.h build/compile $(COMPILE.c) -x c -g0 -o $@ $< +o/%.h.okk: %.h; @ACTION=CHECK.h build/compile $(COMPILE.cxx) -x c++ -g0 -o $@ $< o/%.greg.o: %.greg.c; @ACTION=OBJECTIFY.greg build/compile $(OBJECTIFY.greg.c) $(OUTPUT_OPTION) $< o/%.zip.o: o/%; @build/zipobj $(OUTPUT_OPTION) $< @@ -60,6 +61,7 @@ o/$(MODE)/%.o: %.cc; @ACTION=OBJECTIFY.cxx build/compile $(OBJECTIFY.cxx) $(OUTP o/$(MODE)/%.o: o/$(MODE)/%.cc; @ACTION=OBJECTIFY.cxx build/compile $(OBJECTIFY.cxx) $(OUTPUT_OPTION) $< o/$(MODE)/%.lds: %.lds; @ACTION=PREPROCESS build/compile $(PREPROCESS.lds) $(OUTPUT_OPTION) $< o/$(MODE)/%.h.ok: %.h; @ACTION=CHECK.h build/compile $(COMPILE.c) -x c -g0 -o $@ $< +o/$(MODE)/%.h.okk: %.h; @ACTION=CHECK.h build/compile $(COMPILE.cxx) -x c++ -g0 -o $@ $< o/$(MODE)/%.o: %.greg.c; @ACTION=OBJECTIFY.greg build/compile $(OBJECTIFY.greg.c) $(OUTPUT_OPTION) $< o/$(MODE)/%.greg.o: %.greg.c; @ACTION=OBJECTIFY.greg build/compile $(OBJECTIFY.greg.c) $(OUTPUT_OPTION) $< o/$(MODE)/%.ansi.o: %.ansi.c; @ACTION=OBJECTIFY.ansi build/compile $(OBJECTIFY.ansi.c) $(OUTPUT_OPTION) $< diff --git a/dsp/scale/cdecimate2xuint8x8.c b/dsp/scale/cdecimate2xuint8x8.c index e487d69f..d4993cac 100644 --- a/dsp/scale/cdecimate2xuint8x8.c +++ b/dsp/scale/cdecimate2xuint8x8.c @@ -17,7 +17,6 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "dsp/scale/scale.h" #include "libc/assert.h" #include "libc/intrin/packuswb.h" #include "libc/intrin/paddw.h" diff --git a/dsp/scale/cdecimate2xuint8x8.h b/dsp/scale/cdecimate2xuint8x8.h new file mode 100644 index 00000000..04f30668 --- /dev/null +++ b/dsp/scale/cdecimate2xuint8x8.h @@ -0,0 +1,11 @@ +#ifndef COSMOPOLITAN_DSP_SCALE_CDECIMATE2XUINT8X8_H_ +#define COSMOPOLITAN_DSP_SCALE_CDECIMATE2XUINT8X8_H_ +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + +void *cDecimate2xUint8x8(unsigned long n, unsigned char[n], + const signed char[8]); + +COSMOPOLITAN_C_END_ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_DSP_SCALE_CDECIMATE2XUINT8X8_H_ */ diff --git a/dsp/scale/gyarados.c b/dsp/scale/gyarados.c index e46ea249..dbc4f6d0 100644 --- a/dsp/scale/gyarados.c +++ b/dsp/scale/gyarados.c @@ -22,7 +22,6 @@ #include "dsp/core/ituround.h" #include "dsp/core/q.h" #include "dsp/core/twixt8.h" -#include "dsp/scale/scale.h" #include "libc/limits.h" #include "libc/log/check.h" #include "libc/log/log.h" diff --git a/dsp/scale/magikarp.c b/dsp/scale/magikarp.c index df9a7914..6989ccdc 100644 --- a/dsp/scale/magikarp.c +++ b/dsp/scale/magikarp.c @@ -20,7 +20,7 @@ #include "dsp/core/half.h" #include "dsp/core/ks8.h" #include "dsp/core/kss8.h" -#include "dsp/scale/scale.h" +#include "dsp/scale/cdecimate2xuint8x8.h" #include "libc/macros.h" #include "libc/nexgen32e/x86feature.h" #include "libc/str/str.h" diff --git a/dsp/scale/scale.c b/dsp/scale/scale.c index a5d38fc0..ad69986e 100644 --- a/dsp/scale/scale.c +++ b/dsp/scale/scale.c @@ -18,7 +18,6 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "dsp/core/half.h" -#include "dsp/scale/scale.h" void *Scale2xX(long ys, long xs, unsigned char p[ys][xs], long yn, long xn) { long y, x, w; diff --git a/dsp/scale/scale.h b/dsp/scale/scale.h index fa4a78de..be58dde9 100644 --- a/dsp/scale/scale.h +++ b/dsp/scale/scale.h @@ -15,34 +15,22 @@ void FreeSamplingSolution(struct SamplingSolution *); struct SamplingSolution *ComputeSamplingSolution(long, long, double, double, double); -void *Scale2xX(long ys, long xs, unsigned char[ys][xs], long, long); -void *Scale2xY(long ys, long xs, unsigned char[ys][xs], long, long); -void *Magikarp2xX(long ys, long xs, unsigned char[ys][xs], long, long); -void *Magikarp2xY(long ys, long xs, unsigned char[ys][xs], long, long); -void *Magkern2xX(long ys, long xs, unsigned char[ys][xs], long, long); -void *Magkern2xY(long ys, long xs, unsigned char[ys][xs], long, long); -void *MagikarpY(long dys, long dxs, unsigned char d[restrict dys][dxs], - long sys, long sxs, const unsigned char s[sys][sxs], long yn, - long xn, const signed char K[8]); +void *Scale2xX(long, long, void *, long, long); +void *Scale2xY(long, long, void *, long, long); +void *Magikarp2xX(long, long, void *, long, long); +void *Magikarp2xY(long, long, void *, long, long); +void *Magkern2xX(long, long, void *, long, long); +void *Magkern2xY(long, long, void *, long, long); +void *MagikarpY(long, long, void *, long, long, const void *, long, long, + const signed char[8]); -void *GyaradosUint8(long dyw, long dxw, unsigned char dst[dyw][dxw], long syw, - long sxw, const unsigned char src[syw][sxw], long dyn, - long dxn, long syn, long sxn, long lo, long hi, - struct SamplingSolution *cy, struct SamplingSolution *cx, - bool sharpen); -void *EzGyarados(long dcw, long dyw, long dxw, unsigned char dst[dcw][dyw][dxw], - long scw, long syw, long sxw, - const unsigned char src[scw][syw][sxw], long c0, long cn, - long dyn, long dxn, long syn, long sxn, double ry, double rx, - double oy, double ox); - -void Decimate2xUint8x8(unsigned long n, unsigned char[n * 2], - const signed char[static 8]); -void *cDecimate2xUint8x8(unsigned long n, unsigned char[n * 2], - const signed char[8]); - -void *transpose(long yn, long xn, const unsigned char[yn][xn]); -extern void (*const transpose88b)(unsigned char[8][8]); +void *GyaradosUint8(long, long, void *, long, long, const void *, long, long, + long, long, long, long, struct SamplingSolution *, + struct SamplingSolution *, bool); +void *GyaradosInt16(long, void *, long, const void *, long, long, + struct SamplingSolution *, bool); +void *EzGyarados(long, long, long, void *, long, long, long, const void *, long, + long, long, long, long, long, double, double, double, double); COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/dsp/tty/quant.h b/dsp/tty/quant.h index 93e06ca2..9d08bf0e 100644 --- a/dsp/tty/quant.h +++ b/dsp/tty/quant.h @@ -23,20 +23,26 @@ typedef struct TtyRgb (*rgb2ttyf_f)(__m128); typedef struct TtyRgb (*tty2rgb_f)(struct TtyRgb); typedef struct TtyRgb ttypalette_t[2][8]; +enum TtyQuantizationAlgorithm { + kTtyQuantAnsi, + kTtyQuantTrue, + kTtyQuantXterm256, +}; + +enum TtyBlocksSelection { + kTtyBlocksUnicode, + kTtyBlocksCp437, +}; + +enum TtyQuantizationChannels { + kTtyQuantGrayscale = 1, + kTtyQuantRgb = 3, +}; + struct TtyQuant { - enum TtyQuantizationAlgorithm { - kTtyQuantAnsi, - kTtyQuantTrue, - kTtyQuantXterm256, - } alg; - enum TtyBlocksSelection { - kTtyBlocksUnicode, - kTtyBlocksCp437, - } blocks; - enum TtyQuantizationChannels { - kTtyQuantGrayscale = 1, - kTtyQuantRgb = 3, - } chans; + enum TtyQuantizationAlgorithm alg; + enum TtyBlocksSelection blocks; + enum TtyQuantizationChannels chans; unsigned min; unsigned max; setbg_f setbg; diff --git a/examples/examples.mk b/examples/examples.mk index b89c81b8..92d2868f 100644 --- a/examples/examples.mk +++ b/examples/examples.mk @@ -47,12 +47,12 @@ EXAMPLES_DIRECTDEPS = \ LIBC_CONV \ LIBC_FMT \ LIBC_LOG \ - LIBC_MATH \ LIBC_MEM \ LIBC_NEXGEN32E \ LIBC_NT_KERNELBASE \ LIBC_NT_NTDLL \ LIBC_NT_USER32 \ + LIBC_OHMYPLUS \ LIBC_RAND \ LIBC_RUNTIME \ LIBC_SOCK \ @@ -69,7 +69,6 @@ EXAMPLES_DIRECTDEPS = \ THIRD_PARTY_COMPILER_RT \ THIRD_PARTY_DLMALLOC \ THIRD_PARTY_DTOA \ - THIRD_PARTY_DUKTAPE \ THIRD_PARTY_GETOPT \ THIRD_PARTY_MUSL \ THIRD_PARTY_STB \ @@ -82,6 +81,7 @@ EXAMPLES_DEPS := \ o/$(MODE)/examples/examples.pkg: \ $(EXAMPLES_OBJS) \ + $(THIRD_PARTY_DUKTAPE_A).pkg \ $(foreach x,$(EXAMPLES_DIRECTDEPS),$($(x)_A).pkg) o/$(MODE)/examples/unbourne.o: \ @@ -98,6 +98,7 @@ o/$(MODE)/examples/%.com.dbg: \ o/$(MODE)/examples/%.elf: \ $(EXAMPLES_DEPS) \ + $(THIRD_PARTY_DUKTAPE) \ o/$(MODE)/examples/%.o \ $(CRT) \ $(ELF) @@ -107,6 +108,7 @@ $(EXAMPLES_OBJS): examples/examples.mk o/$(MODE)/examples/hellojs.com.dbg: \ $(EXAMPLES_DEPS) \ + $(THIRD_PARTY_DUKTAPE) \ o/$(MODE)/examples/hellojs.o \ o/$(MODE)/examples/hello.js.zip.o \ o/$(MODE)/examples/examples.pkg \ diff --git a/examples/nesemu1.cc b/examples/nesemu1.cc new file mode 100644 index 00000000..c92cb33b --- /dev/null +++ b/examples/nesemu1.cc @@ -0,0 +1,1555 @@ +/* NESEMU1 :: EMULATOR FOR THE NINTENDO ENTERTAINMENT SYSTEM (R) ARCHITECTURE */ +/* WRITTEN BY AND COPYRIGHT 2011 JOEL YLILUOMA ── SEE: http://iki.fi/bisqwit/ */ +/* PORTED TO TELETYPEWRITERS IN YEAR 2020 BY JUSTINE ALEXANDRA ROBERTS TUNNEY */ +/* TRADEMARKS ARE OWNED BY THEIR RESPECTIVE OWNERS LAWYERCATS LUV TAUTOLOGIES */ +/* https://bisqwit.iki.fi/jutut/kuvat/programming_examples/nesemu1/nesemu1.cc */ +#include "dsp/scale/scale.h" +#include "dsp/tty/itoa8.h" +#include "dsp/tty/quant.h" +#include "dsp/tty/tty.h" +#include "libc/assert.h" +#include "libc/bits/bits.h" +#include "libc/bits/safemacros.h" +#include "libc/calls/calls.h" +#include "libc/calls/hefty/spawn.h" +#include "libc/calls/struct/itimerval.h" +#include "libc/calls/struct/winsize.h" +#include "libc/errno.h" +#include "libc/fmt/fmt.h" +#include "libc/inttypes.h" +#include "libc/log/check.h" +#include "libc/log/log.h" +#include "libc/macros.h" +#include "libc/math.h" +#include "libc/mem/mem.h" +#include "libc/ohmyplus/vector.h" +#include "libc/runtime/gc.h" +#include "libc/runtime/runtime.h" +#include "libc/sock/sock.h" +#include "libc/stdio/stdio.h" +#include "libc/str/str.h" +#include "libc/sysv/consts/fileno.h" +#include "libc/sysv/consts/itimer.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/poll.h" +#include "libc/sysv/consts/sig.h" +#include "libc/time/time.h" +#include "libc/x/x.h" + +#define DYN 240 +#define DXN 256 +#define FPS 60.0988 +#define HZ 1789773 +#define KEYHZ 20 + +#define CTRL(C) ((C) ^ 0100) +#define ALT(C) ((033 << 010) | (C)) + +static const char* inputfn; + +typedef uint32_t u32; +typedef uint16_t u16; +typedef uint8_t u8; +typedef int8_t s8; + +static const struct itimerval kNesFps = {{0, 1. / FPS * 1e6}, + {0, 1. / FPS * 1e6}}; + +struct Frame { + char *p, *w, *mem; +}; + +struct Action { + int code; + int wait; +}; + +struct Audio { + size_t i; + int16_t p[FRAMESIZE]; +}; + +static int frame_; +static int playfd_; +static bool piped_; +static int devnull_; +static int playpid_; +static bool exited_; +static bool timeout_; +static bool resized_; +static size_t vtsize_; +static long tyn_, txn_; +static const char* ffplay_; +static struct Audio audio_; +static struct TtyRgb* ttyrgb_; +static struct Frame frames_[2]; +static unsigned char *R, *G, *B; +static struct Action arrow_, button_; +static struct SamplingSolution* asx_; +static struct SamplingSolution* ssy_; +static struct SamplingSolution* ssx_; +static unsigned char pixels_[3][DYN][DXN]; +static unsigned char palette_[3][64][512][3]; +static int joy_current_[2] = {0, 0}; +static int joy_next_[2] = {0, 0}; +static int joypos_[2] = {0, 0}; + +static int Clamp(int v) { return v > 255 ? 255 : v; } +static float FixGamma(float f) { return f > 0 ? powf(f, 2.2f / 1.8f) : 0; } + +static void WriteStringNow(const char* s) { + ttywrite(STDOUT_FILENO, s, strlen(s)); +} + +void CleanupTerminal(void) { + ttyraw((enum TtyRawFlags)(-1u)); + ttyshowcursor(STDOUT_FILENO); +} + +void OnTimer(void) { timeout_ = true; } +void OnResize(void) { resized_ = true; } +void OnCtrlC(void) { exited_ = true; } +void OnSigChld(void) { piped_ = true, playpid_ = 0; } + +void InitFrame(struct Frame* f) { + f->p = f->w = f->mem = (char*)realloc(f->mem, vtsize_); +} + +void GetTermSize(void) { + struct winsize wsize_; + wsize_.ws_row = 25; + wsize_.ws_col = 80; + getttysize(STDOUT_FILENO, &wsize_); + tyn_ = wsize_.ws_row * 2; + txn_ = wsize_.ws_col * 2; + FreeSamplingSolution(ssy_); + FreeSamplingSolution(ssx_); + ssy_ = ComputeSamplingSolution(tyn_, DYN, 0, 0, 2); + ssx_ = ComputeSamplingSolution(txn_, DXN, 0, 0, 0); + R = (unsigned char*)realloc(R, tyn_ * txn_); + G = (unsigned char*)realloc(G, tyn_ * txn_); + B = (unsigned char*)realloc(B, tyn_ * txn_); + ttyrgb_ = (struct TtyRgb*)realloc(ttyrgb_, tyn_ * txn_ * 4); + vtsize_ = ((tyn_ * txn_ * strlen("\e[48;2;255;48;2;255m▄")) + + (tyn_ * strlen("\e[0m\r\n")) + 128); + frame_ = 0; + InitFrame(&frames_[0]); + InitFrame(&frames_[1]); + WriteStringNow("\e[0m\e[H\e[J"); +} + +bool TrySpeaker(const char* prog, char* const* args) { + int rc; + int fds[3]; + fds[0] = -1; + fds[1] = devnull_; + fds[2] = devnull_; + if ((rc = spawnve(0, fds, prog, args, environ)) != -1) { + playpid_ = rc; + playfd_ = fds[0]; + return true; + } else { + return false; + } +} + +void IoInit(void) { + GetTermSize(); + xsigaction(SIGINT, (void*)OnCtrlC, 0, 0, NULL); + xsigaction(SIGWINCH, (void*)OnResize, 0, 0, NULL); + xsigaction(SIGALRM, (void*)OnTimer, 0, 0, NULL); + setitimer(ITIMER_REAL, &kNesFps, NULL); + ttyhidecursor(STDOUT_FILENO); + ttyraw(kTtySigs); + ttyquantinit(kTtyQuantTrue, kTtyQuantRgb, kTtyBlocksUnicode); + atexit(CleanupTerminal); +} + +void SystemFailure(void) { + fputs("error: ", stderr); + fputs(strerror(errno), stderr); + fputc('\n', stderr); + exit(7); +} + +void ReadKeyboard(void) { + int ch; + char b[20]; + ssize_t i, rc; + memset(b, -1, sizeof(b)); + if ((rc = read(STDIN_FILENO, b, 16)) != -1) { + for (i = 0; i < rc; ++i) { + ch = b[i]; + if (b[i] == '\e') { + ++i; + if (b[i] == '[') { + ++i; + switch (b[i]) { + case 'A': + ch = CTRL('P'); // up arrow + break; + case 'B': + ch = CTRL('N'); // down arrow + break; + case 'C': + ch = CTRL('F'); // right arrow + break; + case 'D': + ch = CTRL('B'); // left arrow + break; + default: + break; + } + } + } + switch (ch) { + case ' ': + button_.code = 0b00100000; // A + button_.wait = KEYHZ; + break; + case 'b': + button_.code = 0b00010000; // B + button_.wait = KEYHZ; + break; + case '\r': // enter + button_.code = 0b10000000; // START + button_.wait = KEYHZ; + break; + case '\t': // tab + button_.code = 0b01000000; // SELECT + button_.wait = KEYHZ; + break; + case 'k': // vim + case 'w': // wasd qwerty + case ',': // wasd dvorak + case CTRL('P'): // emacs + arrow_.code = 0b00000100; // UP + arrow_.wait = KEYHZ; + break; + case 'j': // vim + case 's': // wasd qwerty + case 'o': // wasd dvorak + case CTRL('N'): // emacs + arrow_.code = 0b00001000; // DOWN + arrow_.wait = KEYHZ; + break; + case 'h': // vim + case 'a': // wasd qwerty & dvorak + case CTRL('B'): // emacs + arrow_.code = 0b00000010; // LEFT + arrow_.wait = KEYHZ; + break; + case 'l': // vim + case 'd': // wasd qwerty + case 'e': // wasd dvorak + case CTRL('F'): // emacs + arrow_.code = 0b00000001; // RIGHT + arrow_.wait = KEYHZ; + break; + case 'x': // xterm256 color mode + ttyquantinit(kTtyQuantXterm256, kTtyQuantRgb, kTtyBlocksUnicode); + break; + case 't': // ansi 24bit color mode + ttyquantinit(kTtyQuantTrue, kTtyQuantRgb, kTtyBlocksUnicode); + break; + default: + break; + } + } + } else { + SystemFailure(); + } +} + +struct Frame* FlipFrameBuffer(void) { + frame_ = !frame_; + return &frames_[frame_]; +} + +void TransmitVideo(void) { + ssize_t rc; + struct Frame* f; + f = &frames_[frame_]; + if (f->w >= f->p) f = FlipFrameBuffer(); + if (f->w < f->p) { + if ((rc = write(STDOUT_FILENO, f->w, f->p - f->w)) != -1) { + f->w += rc; + } else { + SystemFailure(); + } + } +} + +void TransmitAudio(void) { + ssize_t rc; + if (!audio_.i) return; + if ((rc = write(playfd_, audio_.p, audio_.i * sizeof(short))) != -1) { + rc /= sizeof(short); + memmove(audio_.p, audio_.p + rc, (audio_.i - rc) * sizeof(short)); + audio_.i -= rc; + } else { + SystemFailure(); + } +} + +void ScaleVideoFrameToTeletypewriter(void) { + long y, x; + GyaradosUint8(tyn_, txn_, R, DYN, DXN, pixels_[0], tyn_, txn_, DYN, DXN, 0, + 255, ssy_, ssx_, true); + GyaradosUint8(tyn_, txn_, G, DYN, DXN, pixels_[1], tyn_, txn_, DYN, DXN, 0, + 255, ssy_, ssx_, true); + GyaradosUint8(tyn_, txn_, B, DYN, DXN, pixels_[2], tyn_, txn_, DYN, DXN, 0, + 255, ssy_, ssx_, true); + for (y = 0; y < tyn_; ++y) { + for (x = 0; x < txn_; ++x) { + ttyrgb_[y * txn_ + x] = + rgb2tty(R[y * txn_ + x], G[y * txn_ + x], B[y * txn_ + x]); + } + } +} + +void KeyCountdown(struct Action* a) { + if (a->wait <= 1) { + a->code = 0; + } else { + a->wait--; + } +} + +void PollAndSynchronize(void) { + struct pollfd fds[3]; + fds[0].fd = STDIN_FILENO; + fds[0].events = POLLIN; + fds[1].fd = STDOUT_FILENO; + fds[1].events = POLLOUT; + fds[2].fd = playpid_ ? playfd_ : -1; + fds[2].events = POLLOUT; + do { + if (poll(fds, ARRAYLEN(fds), 1. / FPS * 1e3) != -1) { + if (fds[0].revents & (POLLIN | POLLERR)) ReadKeyboard(); + if (fds[1].revents & (POLLOUT | POLLERR)) TransmitVideo(); + if (fds[2].revents & (POLLOUT | POLLERR)) TransmitAudio(); + } else if (errno != EINTR) { + SystemFailure(); + } + if (exited_) { + WriteStringNow("\r\n\e[0m\e[J"); + exit(0); + } + if (resized_) { + resized_ = false; + GetTermSize(); + break; + } + } while (!timeout_); + timeout_ = false; + KeyCountdown(&arrow_); + KeyCountdown(&button_); + joy_next_[0] = arrow_.code | button_.code; + joy_next_[1] = arrow_.code | button_.code; +} + +void Raster(void) { + struct Frame* f; + struct TtyRgb bg = {0x12, 0x34, 0x56, 0}; + struct TtyRgb fg = {0x12, 0x34, 0x56, 0}; + ScaleVideoFrameToTeletypewriter(); + f = &frames_[!frame_]; + f->p = f->w = f->mem; + f->p = stpcpy(f->p, "\e[0m\e[H"); + f->p = ttyraster(f->p, ttyrgb_, tyn_, txn_, bg, fg); + CHECK_LT(f->p - f->mem, vtsize_); + PollAndSynchronize(); +} + +void FlushScanline(unsigned py) { + if (py == DYN - 1) { + if (!timeout_) { + Raster(); + } + timeout_ = false; + } +} + +void InitPalette(void) { + // The input value is a NES color index (with de-emphasis bits). + // We need RGB values. To produce a RGB value, we emulate the NTSC circuitry. + // For most part, this process is described at: + // http://wiki.nesdev.com/w/index.php/NTSC_video + // Incidentally, this code is shorter than a table of 64*8 RGB values. + signed char sa[] = "\372\273\32\305\35\311I\330D\357\175\13D!}N"; + int o, u, r, c, b, p, y, i, l, q, e, p0, p1, pixel; + for (o = 0; o < 3; ++o) { + for (u = 0; u < 3; ++u) { + for (p0 = 0; p0 < 512; ++p0) { + for (p1 = 0; p1 < 64; ++p1) { + // Calculate the luma and chroma by emulating the relevant circuits: + y = 0; + i = 0; + q = 0; + // 12 samples of NTSC signal constitute a color. + for (p = 0; p < 12; ++p) { + // Sample either the previous or the current pixel. + r = (p + o * 4) % 12; + // Use pixel=p0 to disable artifacts. + // Decode the color index. + pixel = r < 8 - u * 2 ? p0 : p1; + c = pixel % 16; + l = c < 0xE ? pixel / 4 & 12 : 4; + e = p0 / 64; + // NES NTSC modulator + // square wave between up to four voltage levels + b = 40 + sa[(c > 12 * ((c + 8 + p) % 12 < 6)) + + 2 * !(0451326 >> p / 2 * 3 & e) + l]; + // Ideal TV NTSC demodulator? + y += b; + i += b * round(cos(M_PI * p / 6) * 5909); + q += b * round(sin(M_PI * p / 6) * 5909); + } + // Converts YIQ to RGB + // Store color at subpixel precision + float A[3] = {-1.109, -.275, .947}, B[3] = {1.709, -.636, .624}; + palette_[o][p1][p0][u] = Clamp( + 255 * FixGamma(y / 1980.f + i * A[u] / 9e6f + q * B[u] / 9e6f)); + } + } + } + } +} + +static void PutPixel(unsigned px, unsigned py, unsigned pixel, int offset) { + static bool once; + static unsigned prev; + unsigned rgb; + if (!once) { + InitPalette(); + once = true; + } + pixels_[0][py][px] = palette_[offset][prev % 64][pixel][2]; + pixels_[1][py][px] = palette_[offset][prev % 64][pixel][1]; + pixels_[2][py][px] = palette_[offset][prev % 64][pixel][0]; + prev = pixel; +} + +static void JoyStrobe(unsigned v) { + if (v) { + joy_current_[0] = joy_next_[0]; + joypos_[0] = 0; + } + if (v) { + joy_current_[1] = joy_next_[1]; + joypos_[1] = 0; + } +} + +static u8 JoyRead(unsigned idx) { + // http://tasvideos.org/EmulatorResources/Famtasia/FMV.html + static const u8 masks[8] = { + 0b00100000, // A + 0b00010000, // B + 0b01000000, // SELECT + 0b10000000, // START + 0b00000100, // UP + 0b00001000, // DOWN + 0b00000010, // LEFT + 0b00000001, // RIGHT + }; + return (joy_current_[idx] & masks[joypos_[idx]++ & 7]) ? 1 : 0; +} + +template +struct RegBit { + T data; + enum { mask = (1u << nbits) - 1u }; + template + RegBit& operator=(T2 v) { + data = (data & ~(mask << bitno)) | ((nbits > 1 ? v & mask : !!v) << bitno); + return *this; + } + operator unsigned() const { return (data >> bitno) & mask; } + RegBit& operator++() { return *this = *this + 1; } + unsigned operator++(int) { + unsigned r = *this; + ++*this; + return r; + } +}; + +namespace GamePak { + +const unsigned VRomGranularity = 0x0400; +const unsigned VRomPages = 0x2000 / VRomGranularity; +const unsigned RomGranularity = 0x2000; +const unsigned RomPages = 0x10000 / RomGranularity; + +std::vector ROM; +std::vector VRAM(0x2000); +unsigned mappernum; +unsigned char NRAM[0x1000]; +unsigned char PRAM[0x2000]; +unsigned char* banks[RomPages] = {}; +unsigned char* Vbanks[VRomPages] = {}; +unsigned char* Nta[4] = {NRAM + 0x0000, NRAM + 0x0400, NRAM + 0x0000, + NRAM + 0x0400}; + +template & r, + unsigned granu> +static void SetPages(unsigned size, unsigned baseaddr, unsigned index) { + for (unsigned v = r.size() + index * size, p = baseaddr / granu; + p < (baseaddr + size) / granu && p < npages; ++p, v += granu) { + b[p] = &r[v % r.size()]; + } +} + +auto& SetROM = SetPages; +auto& SetVROM = SetPages; + +u8 Access(unsigned addr, u8 value, bool write) { + if (write && addr >= 0x8000 && mappernum == 7) { // e.g. Rare games + SetROM(0x8000, 0x8000, (value & 7)); + Nta[0] = Nta[1] = Nta[2] = Nta[3] = &NRAM[0x400 * ((value >> 4) & 1)]; + } + if (write && addr >= 0x8000 && mappernum == 2) { // e.g. Rockman, Castlevania + SetROM(0x4000, 0x8000, value); + } + if (write && addr >= 0x8000 && mappernum == 3) { // e.g. Kage, Solomon's Key + value &= Access(addr, 0, false); // Simulate bus conflict + SetVROM(0x2000, 0x0000, (value & 3)); + } + if (write && addr >= 0x8000 && + mappernum == 1) { // e.g. Rockman 2, Simon's Quest + static u8 regs[4] = {0x0C, 0, 0, 0}, counter = 0, cache = 0; + if (value & 0x80) { + regs[0] = 0x0C; + goto configure; + } + cache |= (value & 1) << counter; + if (++counter == 5) { + regs[(addr >> 13) & 3] = value = cache; + configure: + cache = counter = 0; + static const u8 sel[4][4] = { + {0, 0, 0, 0}, {1, 1, 1, 1}, {0, 1, 0, 1}, {0, 0, 1, 1}}; + for (unsigned m = 0; m < 4; ++m) + Nta[m] = &NRAM[0x400 * sel[regs[0] & 3][m]]; + SetVROM(0x1000, 0x0000, + ((regs[0] & 16) ? regs[1] : ((regs[1] & ~1) + 0))); + SetVROM(0x1000, 0x1000, + ((regs[0] & 16) ? regs[2] : ((regs[1] & ~1) + 1))); + switch ((regs[0] >> 2) & 3) { + case 0: + case 1: + SetROM(0x8000, 0x8000, (regs[3] & 0xE) / 2); + break; + case 2: + SetROM(0x4000, 0x8000, 0); + SetROM(0x4000, 0xC000, (regs[3] & 0xF)); + break; + case 3: + SetROM(0x4000, 0x8000, (regs[3] & 0xF)); + SetROM(0x4000, 0xC000, ~0); + break; + } + } + } + if ((addr >> 13) == 3) return PRAM[addr & 0x1FFF]; + return banks[(addr / RomGranularity) % RomPages][addr % RomGranularity]; +} + +void Init() { + unsigned v; + SetVROM(0x2000, 0x0000, 0); + for (v = 0; v < 4; ++v) { + SetROM(0x4000, v * 0x4000, v == 3 ? -1 : 0); + } +} + +} // namespace GamePak + +/* CPU: Ricoh RP2A03 (based on MOS6502, almost the same as in Commodore 64) */ +namespace CPU { + +u8 RAM[0x800]; +bool reset = true; +bool nmi = false; +bool nmi_edge_detected = false; +bool intr = false; + +template +u8 MemAccess(u16 addr, u8 v = 0); +u8 RB(u16 addr) { return MemAccess<0>(addr); } +u8 WB(u16 addr, u8 v) { return MemAccess<1>(addr, v); } +void Tick(); + +} // namespace CPU + +namespace PPU { /* Picture Processing Unit */ + +union regtype { // PPU register file + u32 value; + /* clang-format off */ + // Reg0 (write) // Reg1 (write) // Reg2 (read) + RegBit<0,8,u32> sysctrl; RegBit< 8,8,u32> dispctrl; RegBit<16,8,u32> status; + RegBit<0,2,u32> BaseNTA; RegBit< 8,1,u32> Grayscale; RegBit<21,1,u32> SPoverflow; + RegBit<2,1,u32> Inc; RegBit< 9,1,u32> ShowBG8; RegBit<22,1,u32> SP0hit; + RegBit<3,1,u32> SPaddr; RegBit<10,1,u32> ShowSP8; RegBit<23,1,u32> InVBlank; + RegBit<4,1,u32> BGaddr; RegBit<11,1,u32> ShowBG; // Reg3 (write) + RegBit<5,1,u32> SPsize; RegBit<12,1,u32> ShowSP; RegBit<24,8,u32> OAMaddr; + RegBit<6,1,u32> SlaveFlag; RegBit<11,2,u32> ShowBGSP; RegBit<24,2,u32> OAMdata; + RegBit<7,1,u32> NMIenabled; RegBit<13,3,u32> EmpRGB; RegBit<26,6,u32> OAMindex; + /* clang-format on */ +} reg; + +// Raw memory data as read&written by the game +u8 palette[32]; +u8 OAM[256]; + +// Decoded sprite information, used & changed during each scanline +struct { + u8 sprindex, y, index, attr, x_; + u16 pattern; +} OAM2[8], OAM3[8]; + +union scrolltype { + RegBit<3, 16, u32> raw; // raw VRAM address (16-bit) + RegBit<0, 8, u32> xscroll; // low 8 bits of first write to 2005 + RegBit<0, 3, u32> xfine; // low 3 bits of first write to 2005 + RegBit<3, 5, u32> xcoarse; // high 5 bits of first write to 2005 + RegBit<8, 5, u32> ycoarse; // high 5 bits of second write to 2005 + RegBit<13, 2, u32> basenta; // nametable index (copied from 2000) + RegBit<13, 1, u32> basenta_h; // horizontal nametable index + RegBit<14, 1, u32> basenta_v; // vertical nametable index + RegBit<15, 3, u32> yfine; // low 3 bits of second write to 2005 + RegBit<11, 8, u32> vaddrhi; // first write to 2006 w/ high 2 bits set to 0 + RegBit<3, 8, u32> vaddrlo; // second write to 2006 +} scroll, vaddr; + +unsigned pat_addr, sprinpos, sproutpos, sprrenpos, sprtmp; +u16 tileattr, tilepat, ioaddr; +u32 bg_shift_pat, bg_shift_attr; +int x_ = 0; +int scanline = 241; +int scanline_end = 341; +int VBlankState = 0; +int cycle_counter = 0; +int read_buffer = 0; +int open_bus = 0; +int open_bus_decay_timer = 0; +bool even_odd_toggle = false; +bool offset_toggle = false; + +/* Memory mapping: Convert PPU memory address into reference to relevant data */ +u8& NesMmap(int i) { + i &= 0x3FFF; + if (i >= 0x3F00) { + if (i % 4 == 0) i &= 0x0F; + return palette[i & 0x1F]; + } + if (i < 0x2000) { + return GamePak::Vbanks[(i / GamePak::VRomGranularity) % GamePak::VRomPages] + [i % GamePak::VRomGranularity]; + } + return GamePak::Nta[(i >> 10) & 3][i & 0x3FF]; +} + +// External I/O: read or write +u8 PpuAccess(u16 index, u8 v, bool write) { + auto RefreshOpenBus = [&](u8 v) { + return open_bus_decay_timer = 77777, open_bus = v; + }; + u8 res = open_bus; + if (write) RefreshOpenBus(v); + switch (index) { // Which port from $200x? + case 0: + if (write) { + reg.sysctrl = v; + scroll.basenta = reg.BaseNTA; + } + break; + case 1: + if (write) { + reg.dispctrl = v; + } + break; + case 2: + if (write) break; + res = reg.status | (open_bus & 0x1F); + reg.InVBlank = false; // Reading $2002 clears the vblank flag. + offset_toggle = false; // Also resets the toggle for address updates. + if (VBlankState != -5) { + VBlankState = 0; // This also may cancel the setting of InVBlank. + } + break; + case 3: + if (write) reg.OAMaddr = v; + break; // Index into Object Attribute Memory + case 4: + if (write) { + OAM[reg.OAMaddr++] = v; // Write or read the OAM (sprites). + } else { + res = + RefreshOpenBus(OAM[reg.OAMaddr] & (reg.OAMdata == 2 ? 0xE3 : 0xFF)); + } + break; + case 5: + if (!write) break; // Set background scrolling offset + if (offset_toggle) { + scroll.yfine = v & 7; + scroll.ycoarse = v >> 3; + } else { + scroll.xscroll = v; + } + offset_toggle = !offset_toggle; + break; + case 6: + if (!write) break; // Set video memory position for reads/writes + if (offset_toggle) { + scroll.vaddrlo = v; + vaddr.raw = (unsigned)scroll.raw; + } else { + scroll.vaddrhi = v & 0x3F; + } + offset_toggle = !offset_toggle; + break; + case 7: + res = read_buffer; + u8& t = NesMmap(vaddr.raw); // Access the video memory. + if (write) { + res = t = v; + } else { + if ((vaddr.raw & 0x3F00) == 0x3F00) { // palette? + res = read_buffer = (open_bus & 0xC0) | (t & 0x3F); + } + read_buffer = t; + } + RefreshOpenBus(res); + vaddr.raw = vaddr.raw + + (reg.Inc ? 32 : 1); // The address is automatically updated. + break; + } + return res; +} + +void RenderingTick() { + int y1, y2; + bool tile_decode_mode = + 0x10FFFF & (1u << (x_ / 16)); // When x_ is 0..255, 320..335 + // Each action happens in two steps: 1) select memory address; 2) receive data + // and react on it. + switch (x_ % 8) { + case 2: // Point to attribute table + ioaddr = 0x23C0 + 0x400 * vaddr.basenta + 8 * (vaddr.ycoarse / 4) + + (vaddr.xcoarse / 4); + if (tile_decode_mode) break; // Or nametable, with sprites. + case 0: // Point to nametable + ioaddr = 0x2000 + (vaddr.raw & 0xFFF); + // Reset sprite data + if (x_ == 0) { + sprinpos = sproutpos = 0; + if (reg.ShowSP) reg.OAMaddr = 0; + } + if (!reg.ShowBG) break; + // Reset scrolling (vertical once, horizontal each scanline) + if (x_ == 304 && scanline == -1) vaddr.raw = (unsigned)scroll.raw; + if (x_ == 256) { + vaddr.xcoarse = (unsigned)scroll.xcoarse; + vaddr.basenta_h = (unsigned)scroll.basenta_h; + sprrenpos = 0; + } + break; + case 1: + if (x_ == 337 && scanline == -1 && even_odd_toggle && reg.ShowBG) { + scanline_end = 340; + } + // Name table access + pat_addr = 0x1000 * reg.BGaddr + 16 * NesMmap(ioaddr) + vaddr.yfine; + if (!tile_decode_mode) break; + // Push the current tile into shift registers. + // The bitmap pattern is 16 bits, while the attribute is 2 bits, repeated + // 8 times. + bg_shift_pat = (bg_shift_pat >> 16) + 0x00010000 * tilepat; + bg_shift_attr = (bg_shift_attr >> 16) + 0x55550000 * tileattr; + break; + case 3: + // Attribute table access + if (tile_decode_mode) { + tileattr = (NesMmap(ioaddr) >> + ((vaddr.xcoarse & 2) + 2 * (vaddr.ycoarse & 2))) & + 3; + // Go to the next tile horizontally (and switch nametable if it wraps) + if (!++vaddr.xcoarse) { + vaddr.basenta_h = 1 - vaddr.basenta_h; + } + // At the edge of the screen, do the same but vertically + if (x_ == 251 && !++vaddr.yfine && ++vaddr.ycoarse == 30) { + vaddr.ycoarse = 0; + vaddr.basenta_v = 1 - vaddr.basenta_v; + } + } else if (sprrenpos < sproutpos) { + // Select sprite pattern instead of background pattern + auto& o = OAM3[sprrenpos]; // Sprite to render on next scanline + memcpy(&o, &OAM2[sprrenpos], sizeof(o)); + unsigned y = (scanline)-o.y; + if (o.attr & 0x80) y ^= (reg.SPsize ? 15 : 7); + pat_addr = 0x1000 * (reg.SPsize ? (o.index & 0x01) : reg.SPaddr); + pat_addr += 0x10 * (reg.SPsize ? (o.index & 0xFE) : (o.index & 0xFF)); + pat_addr += (y & 7) + (y & 8) * 2; + } + break; + // Pattern table bytes + case 5: + tilepat = NesMmap(pat_addr | 0); + break; + case 7: // Interleave the bits of the two pattern bytes + unsigned p = tilepat | (NesMmap(pat_addr | 8) << 8); + p = (p & 0xF00F) | ((p & 0x0F00) >> 4) | ((p & 0x00F0) << 4); + p = (p & 0xC3C3) | ((p & 0x3030) >> 2) | ((p & 0x0C0C) << 2); + p = (p & 0x9999) | ((p & 0x4444) >> 1) | ((p & 0x2222) << 1); + tilepat = p; + // When decoding sprites, save the sprite graphics and move to next sprite + if (!tile_decode_mode && sprrenpos < sproutpos) { + OAM3[sprrenpos++].pattern = tilepat; + } + break; + } + // Find which sprites are visible on next scanline (TODO: implement crazy + // 9-sprite malfunction) + switch (x_ >= 64 && x_ < 256 && x_ % 2 ? (reg.OAMaddr++ & 3) : 4) { + default: + // Access OAM (object attribute memory) + sprtmp = OAM[reg.OAMaddr]; + break; + case 0: + if (sprinpos >= 64) { + reg.OAMaddr = 0; + break; + } + ++sprinpos; // next sprite + if (sproutpos < 8) OAM2[sproutpos].y = sprtmp; + if (sproutpos < 8) OAM2[sproutpos].sprindex = reg.OAMindex; + y1 = sprtmp; + y2 = sprtmp + (reg.SPsize ? 16 : 8); + if (!(scanline >= y1 && scanline < y2)) { + reg.OAMaddr = sprinpos != 2 ? reg.OAMaddr + 3 : 8; + } + break; + case 1: + if (sproutpos < 8) OAM2[sproutpos].index = sprtmp; + break; + case 2: + if (sproutpos < 8) OAM2[sproutpos].attr = sprtmp; + break; + case 3: + if (sproutpos < 8) OAM2[sproutpos].x_ = sprtmp; + if (sproutpos < 8) { + ++sproutpos; + } else { + reg.SPoverflow = true; + } + if (sprinpos == 2) reg.OAMaddr = 8; + break; + } +} + +void RenderPixel() { + bool edge = u8(x_ + 8) < 16; // 0..7, 248..255 + bool showbg = reg.ShowBG && (!edge || reg.ShowBG8); + bool showsp = reg.ShowSP && (!edge || reg.ShowSP8); + + // Render the background + unsigned fx = scroll.xfine, + xpos = 15 - (((x_ & 7) + fx + 8 * !!(x_ & 7)) & 15); + + unsigned pixel = 0, attr = 0; + if (showbg) { // Pick a pixel from the shift registers + pixel = (bg_shift_pat >> (xpos * 2)) & 3; + attr = (bg_shift_attr >> (xpos * 2)) & (pixel ? 3 : 0); + } else if ((vaddr.raw & 0x3F00) == 0x3F00 && !reg.ShowBGSP) { + pixel = vaddr.raw; + } + + // Overlay the sprites + if (showsp) { + for (unsigned sno = 0; sno < sprrenpos; ++sno) { + auto& s = OAM3[sno]; + // Check if this sprite is horizontally in range + unsigned xdiff = x_ - s.x_; + if (xdiff >= 8) continue; // Also matches negative values + // Determine which pixel to display; skip transparent pixels + if (!(s.attr & 0x40)) xdiff = 7 - xdiff; + u8 spritepixel = (s.pattern >> (xdiff * 2)) & 3; + if (!spritepixel) continue; + // Register sprite-0 hit if applicable + if (x_ < 255 && pixel && s.sprindex == 0) reg.SP0hit = true; + // Render the pixel unless behind-background placement wanted + if (!(s.attr & 0x20) || !pixel) { + attr = (s.attr & 3) + 4; + pixel = spritepixel; + } + // Only process the first non-transparent sprite pixel. + break; + } + } + + pixel = palette[(attr * 4 + pixel) & 0x1F] & (reg.Grayscale ? 0x30 : 0x3F); + PutPixel(x_, scanline, pixel | (reg.EmpRGB << 6), cycle_counter); +} + +void ReadToolAssistedSpeedrunRobotKeys() { + static FILE* fp; + if (!fp && !isempty(inputfn)) { + fp = fopen(inputfn, "rb"); + } + if (fp) { + static unsigned ctrlmask = 0; + if (!ftell(fp)) { + fseek(fp, 0x05, SEEK_SET); + ctrlmask = fgetc(fp); + fseek(fp, 0x90, SEEK_SET); // Famtasia Movie format. + } + if (ctrlmask & 0x80) { + joy_next_[0] = fgetc(fp); + if (feof(fp)) joy_next_[0] = 0; + } + if (ctrlmask & 0x40) { + joy_next_[1] = fgetc(fp); + if (feof(fp)) joy_next_[1] = 0; + } + } +} + +// PPU::Tick() -- This function is called 3 times per each CPU cycle. +// Each call iterates through one pixel of the screen. +// The screen is divided into 262 scanlines, each having 341 columns, as such: +// +// x_=0 x_=256 x_=340 +// ___|____________________|__________| +// y=-1 | pre-render scanline| prepare | > +// ___|____________________| sprites _| > Graphics +// y=0 | visible area | for the | > processing +// | - this is rendered | next | > scanlines +// y=239 | on the screen. | scanline | > +// ___|____________________|______ +// y=240 | idle +// ___|_______________________________ +// y=241 | vertical blanking (idle) +// | 20 scanlines long +// y=260___|____________________|__________| +// +// On actual PPU, the scanline begins actually before x_=0, with +// sync/colorburst/black/background color being rendered, and +// ends after x_=256 with background/black being rendered first, +// but in this emulator we only care about the visible area. +// +// When background rendering is enabled, scanline -1 is +// 340 or 341 pixels long, alternating each frame. +// In all other situations the scanline is 341 pixels long. +// Thus, it takes 89341 or 89342 PPU::Tick() calls to render 1 frame. +void Tick() { + // Set/clear vblank where needed + switch (VBlankState) { + case -5: + reg.status = 0; + break; + case 2: + reg.InVBlank = true; + break; + case 0: + CPU::nmi = reg.InVBlank && reg.NMIenabled; + break; + } + if (VBlankState != 0) VBlankState += (VBlankState < 0 ? 1 : -1); + if (open_bus_decay_timer && !--open_bus_decay_timer) open_bus = 0; + + // Graphics processing scanline? + if (scanline < DYN) { + /* Process graphics for this cycle */ + if (reg.ShowBGSP) RenderingTick(); + if (scanline >= 0 && x_ < 256) RenderPixel(); + } + + // Done with the cycle. Check for end of scanline. + if (++cycle_counter == 3) cycle_counter = 0; // For NTSC pixel shifting + if (++x_ >= scanline_end) { + // Begin new scanline + FlushScanline(scanline); + scanline_end = 341; + x_ = 0; + // Does something special happen on the new scanline? + switch (scanline += 1) { + case 261: // Begin of rendering + scanline = -1; // pre-render line + even_odd_toggle = !even_odd_toggle; + // Clear vblank flag + VBlankState = -5; + break; + case 241: // Begin of vertical blanking + ReadToolAssistedSpeedrunRobotKeys(); + // Set vblank flag + VBlankState = 2; + } + } +} + +} // namespace PPU + +namespace APU { /* Audio Processing Unit */ + +static const u8 LengthCounters[32] = { + 10, 254, 20, 2, 40, 4, 80, 6, 160, 8, 60, 10, 14, 12, 26, 14, + 12, 16, 24, 18, 48, 20, 96, 22, 192, 24, 72, 26, 16, 28, 32, 30, +}; + +static const u16 NoisePeriods[16] = { + 2, 4, 8, 16, 32, 48, 64, 80, 101, 127, 190, 254, 381, 508, 1017, 2034, +}; + +static const u16 DMCperiods[16] = { + 428, 380, 340, 320, 286, 254, 226, 214, 190, 160, 142, 128, 106, 84, 72, 54, +}; + +bool IRQdisable = true; +bool FiveCycleDivider; +bool ChannelsEnabled[5]; +bool PeriodicIRQ; +bool DMC_IRQ; + +bool count(int& v, int reset) { return --v < 0 ? (v = reset), true : false; } + +struct channel { + int length_counter, linear_counter, address, envelope; + int sweep_delay, env_delay, wave_counter, hold, phase, level; + union { // Per-channel register file + // 4000, 4004, 400C, 4012: + RegBit<0, 8, u32> reg0; + RegBit<6, 2, u32> DutyCycle; + RegBit<4, 1, u32> EnvDecayDisable; + RegBit<0, 4, u32> EnvDecayRate; + RegBit<5, 1, u32> EnvDecayLoopEnable; + RegBit<0, 4, u32> FixedVolume; + RegBit<5, 1, u32> LengthCounterDisable; + RegBit<0, 7, u32> LinearCounterInit; + RegBit<7, 1, u32> LinearCounterDisable; + // 4001, 4005, 4013: + RegBit<8, 8, u32> reg1; + RegBit<8, 3, u32> SweepShift; + RegBit<11, 1, u32> SweepDecrease; + RegBit<12, 3, u32> SweepRate; + RegBit<15, 1, u32> SweepEnable; + RegBit<8, 8, u32> PCMlength; + // 4002, 4006, 400A, 400E: + RegBit<16, 8, u32> reg2; + RegBit<16, 4, u32> NoiseFreq; + RegBit<23, 1, u32> NoiseType; + RegBit<16, 11, u32> WaveLength; + // 4003, 4007, 400B, 400F, 4010: + RegBit<24, 8, u32> reg3; + RegBit<27, 5, u32> LengthCounterInit; + RegBit<30, 1, u32> LoopEnabled; + RegBit<31, 1, u32> IRQenable; + } reg; + + // Function for updating the wave generators and taking the sample for each + // channel. + template + int Tick() { + channel& ch = *this; + if (!ChannelsEnabled[c]) return c == 4 ? 64 : 8; + int wl = (ch.reg.WaveLength + 1) * (c >= 2 ? 1 : 2); + if (c == 3) wl = NoisePeriods[ch.reg.NoiseFreq]; + int volume = ch.length_counter + ? ch.reg.EnvDecayDisable ? ch.reg.FixedVolume : ch.envelope + : 0; + // Sample may change at wavelen intervals. + auto& S = ch.level; + if (!count(ch.wave_counter, wl)) return S; + switch (c) { + default: // Square wave. With four different 8-step binary waveforms (32 + // bits of data total). + if (wl < 8) return S = 8; + return S = (0xF33C0C04u & + (1u << (++ch.phase % 8 + ch.reg.DutyCycle * 8))) + ? volume + : 0; + + case 2: // Triangle wave + if (ch.length_counter && ch.linear_counter && wl >= 3) ++ch.phase; + return S = (ch.phase & 15) ^ ((ch.phase & 16) ? 15 : 0); + + case 3: // Noise: Linear feedback shift register + if (!ch.hold) ch.hold = 1; + ch.hold = + (ch.hold >> 1) | + (((ch.hold ^ (ch.hold >> (ch.reg.NoiseType ? 6 : 1))) & 1) << 14); + return S = (ch.hold & 1) ? 0 : volume; + + case 4: // Delta modulation channel (DMC) + // hold = 8 bit value, phase = number of bits buffered + if (ch.phase == 0) { // Nothing in sample buffer? + if (!ch.length_counter && ch.reg.LoopEnabled) { // Loop? + ch.length_counter = ch.reg.PCMlength * 16 + 1; + ch.address = (ch.reg.reg0 | 0x300) << 6; + } + if (ch.length_counter > 0) { // Load next 8 bits if available + // Note: Re-entrant! But not recursive, because even + // the shortest wave length is greater than the read time. + // TODO: proper clock + if (ch.reg.WaveLength > 20) + for (unsigned t = 0; t < 3; ++t) + CPU::RB(u16(ch.address) | 0x8000); // timing + ch.hold = CPU::RB(u16(ch.address++) | 0x8000); // Fetch byte + ch.phase = 8; + --ch.length_counter; + } else { // Otherwise, disable channel or issue IRQ + ChannelsEnabled[4] = + ch.reg.IRQenable && (CPU::intr = DMC_IRQ = true); + } + } + if (ch.phase != 0) { // Update the signal if sample buffer nonempty + int v = ch.linear_counter; + if (ch.hold & (0x80 >> --ch.phase)) { + v += 2; + } else { + v -= 2; + } + if (v >= 0 && v <= 0x7F) ch.linear_counter = v; + } + return S = ch.linear_counter; + } + } +} channels[5] = {}; + +struct { + short lo, hi; +} hz240counter = {0, 0}; + +void Write(u8 index, u8 value) { + unsigned c; + channel& ch = channels[(index / 4) % 5]; + switch (index < 0x10 ? index % 4 : index) { + case 0: + if (ch.reg.LinearCounterDisable) { + ch.linear_counter = value & 0x7F; + } + ch.reg.reg0 = value; + break; + case 1: + ch.reg.reg1 = value; + ch.sweep_delay = ch.reg.SweepRate; + break; + case 2: + ch.reg.reg2 = value; + break; + case 3: + ch.reg.reg3 = value; + if (ChannelsEnabled[index / 4]) { + ch.length_counter = LengthCounters[ch.reg.LengthCounterInit]; + } + ch.linear_counter = ch.reg.LinearCounterInit; + ch.env_delay = ch.reg.EnvDecayRate; + ch.envelope = 15; + if (index < 8) ch.phase = 0; + break; + case 0x10: + ch.reg.reg3 = value; + ch.reg.WaveLength = DMCperiods[value & 0x0F]; + break; + case 0x12: + ch.reg.reg0 = value; + ch.address = (ch.reg.reg0 | 0x300) << 6; + break; + case 0x13: + ch.reg.reg1 = value; + ch.length_counter = ch.reg.PCMlength * 16 + 1; + break; // sample length + case 0x11: + ch.linear_counter = value & 0x7F; + break; // dac value + case 0x15: + for (c = 0; c < 5; ++c) { + ChannelsEnabled[c] = value & (1 << c); + } + for (c = 0; c < 5; ++c) { + if (!ChannelsEnabled[c]) { + channels[c].length_counter = 0; + } else if (c == 4 && channels[c].length_counter == 0) { + channels[c].length_counter = ch.reg.PCMlength * 16 + 1; + } + } + break; + case 0x17: + IRQdisable = value & 0x40; + FiveCycleDivider = value & 0x80; + hz240counter = {0, 0}; + if (IRQdisable) { + PeriodicIRQ = DMC_IRQ = false; + } + break; + } +} + +u8 Read() { + unsigned c; + u8 res = 0; + for (c = 0; c < 5; ++c) { + res |= channels[c].length_counter ? 1 << c : 0; + } + if (PeriodicIRQ) res |= 0x40; + PeriodicIRQ = false; + if (DMC_IRQ) res |= 0x80; + DMC_IRQ = false; + CPU::intr = false; + return res; +} + +void Tick() { // Invoked at CPU's rate. + // Divide CPU clock by 7457.5 to get a 240 Hz, which controls certain events. + if ((hz240counter.lo += 2) >= 14915) { + hz240counter.lo -= 14915; + if (++hz240counter.hi >= 4 + FiveCycleDivider) hz240counter.hi = 0; + + // 60 Hz interval: IRQ. IRQ is not invoked in five-cycle mode (48 Hz). + if (!IRQdisable && !FiveCycleDivider && hz240counter.hi == 0) { + CPU::intr = PeriodicIRQ = true; + } + + // Some events are invoked at 96 Hz or 120 Hz rate. Others, 192 Hz or 240 + // Hz. + bool HalfTick = (hz240counter.hi & 5) == 1; + bool FullTick = hz240counter.hi < 4; + for (unsigned c = 0; c < 4; ++c) { + channel& ch = channels[c]; + int wl = ch.reg.WaveLength; + + // Length tick (all channels except DMC, but different disable bit for + // triangle wave) + if (HalfTick && ch.length_counter && + !(c == 2 ? ch.reg.LinearCounterDisable : ch.reg.LengthCounterDisable)) + ch.length_counter -= 1; + + // Sweep tick (square waves only) + if (HalfTick && c < 2 && count(ch.sweep_delay, ch.reg.SweepRate)) + if (wl >= 8 && ch.reg.SweepEnable && ch.reg.SweepShift) { + int s = wl >> ch.reg.SweepShift, d[4] = {s, s, ~s, -s}; + wl += d[ch.reg.SweepDecrease * 2 + c]; + if (wl < 0x800) ch.reg.WaveLength = wl; + } + + // Linear tick (triangle wave only) + if (FullTick && c == 2) { + ch.linear_counter = + ch.reg.LinearCounterDisable + ? ch.reg.LinearCounterInit + : (ch.linear_counter > 0 ? ch.linear_counter - 1 : 0); + } + + // Envelope tick (square and noise channels) + if (FullTick && c != 2 && count(ch.env_delay, ch.reg.EnvDecayRate)) { + if (ch.envelope > 0 || ch.reg.EnvDecayLoopEnable) { + ch.envelope = (ch.envelope - 1) & 15; + } + } + } + } + +// Mix the audio: Get the momentary sample from each channel and mix them. +#define s(c) channels[c].Tick() + auto v = [](float m, float n, float d) { return n != 0.f ? m / n : d; }; + short sample = + 30000 * + (v(95.88f, (100.f + v(8128.f, s(0) + s(1), -100.f)), 0.f) + + v(159.79f, + (100.f + + v(1.0, s(2) / 8227.f + s(3) / 12241.f + s(4) / 22638.f, -100.f)), + 0.f) - + 0.5f); +#undef s + + audio_.p[audio_.i = (audio_.i + 1) & (ARRAYLEN(audio_.p) - 1)] = sample; +} + +} // namespace APU + +namespace CPU { + +void Tick() { + // PPU clock: 3 times the CPU rate + for (unsigned n = 0; n < 3; ++n) PPU::Tick(); + // APU clock: 1 times the CPU rate + for (unsigned n = 0; n < 1; ++n) APU::Tick(); +} + +template +u8 MemAccess(u16 addr, u8 v) { + // Memory writes are turned into reads while reset is being signalled + if (reset && write) return MemAccess<0>(addr); + Tick(); + // Map the memory from CPU's viewpoint. + /**/ if (addr < 0x2000) { + u8& r = RAM[addr & 0x7FF]; + if (!write) return r; + r = v; + } else if (addr < 0x4000) { + return PPU::PpuAccess(addr & 7, v, write); + } else if (addr < 0x4018) { + switch (addr & 0x1F) { + case 0x14: // OAM DMA: Copy 256 bytes from RAM into PPU's sprite memory + if (write) + for (unsigned b = 0; b < 256; ++b) + WB(0x2004, RB((v & 7) * 0x0100 + b)); + return 0; + case 0x15: + if (!write) return APU::Read(); + APU::Write(0x15, v); + break; + case 0x16: + if (!write) return JoyRead(0); + JoyStrobe(v); + break; + case 0x17: + if (!write) return JoyRead(1); // write:passthru + default: + if (!write) break; + APU::Write(addr & 0x1F, v); + } + } else { + return GamePak::Access(addr, v, write); + } + return 0; +} + +// CPU registers: +u16 PC = 0xC000; +u8 A = 0, X = 0, Y = 0, S = 0; +union { /* Status flags: */ + u8 raw; + RegBit<0> C; // carry + RegBit<1> Z; // zero + RegBit<2> I; // interrupt enable/disable + RegBit<3> D; // decimal mode (unsupported on NES, but flag exists) + // 4,5 (0x10,0x20) don't exist + RegBit<6> V; // overflow + RegBit<7> N; // negative +} P; + +u16 wrap(u16 oldaddr, u16 newaddr) { return (oldaddr & 0xFF00) + u8(newaddr); } +void Misfire(u16 old, u16 addr) { + u16 q = wrap(old, addr); + if (q != addr) RB(q); +} +u8 Pop() { return RB(0x100 | u8(++S)); } +void Push(u8 v) { WB(0x100 | u8(S--), v); } + +template // Execute a single CPU instruction, defined by opcode "op". +void Ins() { // With template magic, the compiler will literally synthesize + // >256 different functions. + // Note: op 0x100 means "NMI", 0x101 means "Reset", 0x102 means "IRQ". They + // are implemented in terms of "BRK". User is responsible for ensuring that + // WB() will not store into memory while Reset is being processed. + unsigned addr = 0, d = 0, t = 0xFF, c = 0, sb = 0, + pbits = op < 0x100 ? 0x30 : 0x20; + + // Define the opcode decoding matrix, which decides which micro-operations + // constitute any particular opcode. (Note: The PLA of 6502 works on a + // slightly different principle.) + enum { o8 = op / 8, o8m = 1 << (op % 8) }; + +// Fetch op'th item from a bitstring encoded in a data-specific variant of +// base64, where each character transmits 8 bits of information rather than 6. +// This peculiar encoding was chosen to reduce the source code size. +// Enum temporaries are used in order to ensure compile-time evaluation. +#define t(s, code) \ + { \ + enum { \ + i = o8m & \ + (s[o8] > 90 ? (130 + " (),-089<>?BCFGHJLSVWZ[^hlmnxy|}"[s[o8] - 94]) \ + : (s[o8] - " (("[s[o8] / 39])) \ + }; \ + if (i) { \ + code; \ + } \ + } + + /* wow */ + /* clang-format off */ + /* Decode address operand */ + t(" !", addr = 0xFFFA) // NMI vector location + t(" *", addr = 0xFFFC) // Reset vector location + t("! ,", addr = 0xFFFE) // Interrupt vector location + t("zy}z{y}zzy}zzy}zzy}zzy}zzy}zzy}z ", addr = RB(PC++)) + t("2 yy2 yy2 yy2 yy2 XX2 XX2 yy2 yy ", d = X) // register index + t(" 62 62 62 62 om om 62 62 ", d = Y) + t("2 y 2 y 2 y 2 y 2 y 2 y 2 y 2 y ", addr=u8(addr+d); d=0; Tick()) // add zeropage-index + t(" y z!y z y z y z y z y z y z y z ", addr=u8(addr); addr+=256*RB(PC++)) // absolute address + t("3 6 2 6 2 6 286 2 6 2 6 2 6 2 6 /", addr=RB(c=addr); addr+=256*RB(wrap(c,c+1)))// indirect w/ page wrap + t(" *Z *Z *Z *Z 6z *Z *Z ", Misfire(addr, addr+d)) // abs. load: extra misread when cross-page + t(" 4k 4k 4k 4k 6z 4k 4k ", RB(wrap(addr, addr+d)))// abs. store: always issue a misread + /* Load source operand */ + t("aa__ff__ab__,4 ____ - ____ ", t &= A) // Many operations take A or X as operand. Some try in + t(" knnn 4 99 ", t &= X) // error to take both; the outcome is an AND operation. + t(" 9989 99 ", t &= Y) // sty,dey,iny,tya,cpy + t(" 4 ", t &= S) // tsx, las + t("!!!! !! !! !! ! !! !! !!/", t &= P.raw|pbits; c = t)// php, flag test/set/clear, interrupts + t("_^__dc___^__ ed__98 ", c = t; t = 0xFF) // save as second operand + t("vuwvzywvvuwvvuwv zy|zzywvzywv ", t &= RB(addr+d)) // memory operand + t(",2 ,2 ,2 ,2 -2 -2 -2 -2 ", t &= RB(PC++)) // immediate operand + /* Operations that mogrify memory operands directly */ + t(" 88 ", P.V = t & 0x40; P.N = t & 0x80) // bit + t(" nink nnnk ", sb = P.C) // rol,rla, ror,rra,arr + t("nnnknnnk 0 ", P.C = t & 0x80) // rol,rla, asl,slo,[arr,anc] + t(" nnnknink ", P.C = t & 0x01) // lsr,sre, ror,rra,asr + t("ninknink ", t = (t << 1) | (sb * 0x01)) + t(" nnnknnnk ", t = (t >> 1) | (sb * 0x80)) + t(" ! kink ", t = u8(t - 1)) // dec,dex,dey,dcp + t(" ! khnk ", t = u8(t + 1)) // inc,inx,iny,isb + /* Store modified value (memory) */ + t("kgnkkgnkkgnkkgnkzy|J kgnkkgnk ", WB(addr+d, t)) + t(" q ", WB(wrap(addr, addr+d), t &= ((addr+d) >> 8))) // [shx,shy,shs,sha?] + /* Some operations used up one clock cycle that we did not account for yet */ + t("rpstljstqjstrjst - - - -kjstkjst/", Tick()) // nop,flag ops,inc,dec,shifts,stack,transregister,interrupts + /* Stack operations and unconditional jumps */ + t(" ! ! ! ", Tick(); t = Pop()) // pla,plp,rti + t(" ! ! ", RB(PC++); PC = Pop(); PC |= (Pop() << 8)) // rti,rts + t(" ! ", RB(PC++)) // rts + t("! ! /", d=PC+(op?-1:1); Push(d>>8); Push(d)) // jsr, interrupts + t("! ! 8 8 /", PC = addr) // jmp, jsr, interrupts + t("!! ! /", Push(t)) // pha, php, interrupts + /* Bitmasks */ + t("! !! !! !! !! ! !! !! !!/", t = 1) + t(" ! ! !! !! ", t <<= 1) + t("! ! ! !! !! ! ! !/", t <<= 2) + t(" ! ! ! ! ! ", t <<= 4) + t(" ! ! ! !____ ", t = u8(~t)) // sbc, isb, clear flag + t("`^__ ! ! !/", t = c | t) // ora, slo, set flag + t(" !!dc`_ !! ! ! !! !! ! ", t = c & t) // and, bit, rla, clear/test flag + t(" _^__ ", t = c ^ t) // eor, sre + /* Conditional branches */ + t(" ! ! ! ! ", if(t) { Tick(); Misfire(PC, addr = s8(addr) + PC); PC=addr; }) + t(" ! ! ! ! ", if(!t) { Tick(); Misfire(PC, addr = s8(addr) + PC); PC=addr; }) + /* Addition and subtraction */ + t(" _^__ ____ ", c = t; t += A + P.C; P.V = (c^t) & (A^t) & 0x80; P.C = t & 0x100) + t(" ed__98 ", t = c - t; P.C = ~t & 0x100) // cmp,cpx,cpy, dcp, sbx + /* Store modified value (register) */ + t("aa__aa__aa__ab__ 4 !____ ____ ", A = t) + t(" nnnn 4 ! ", X = t) // ldx, dex, tax, inx, tsx,lax,las,sbx + t(" ! 9988 ! ", Y = t) // ldy, dey, tay, iny + t(" 4 0 ", S = t) // txs, las, shs + t("! ! ! !! ! ! ! ! !/", P.raw = t & ~0x30) // plp, rti, flag set/clear + /* Generic status flag updates */ + t("wwwvwwwvwwwvwxwv 5 !}}||{}wv{{wv ", P.N = t & 0x80) + t("wwwv||wvwwwvwxwv 5 !}}||{}wv{{wv ", P.Z = u8(t) == 0) + t(" 0 ", P.V = (((t >> 5)+1)&2)) // [arr] + /* clang-format on */ + /* All implemented opcodes are cycle-accurate and memory-access-accurate. + * [] means that this particular separate rule exists only to provide the + * indicated unofficial opcode(s). */ +} + +void Op() { + /* Check the state of NMI flag */ + bool nmi_now = nmi; + + unsigned op = RB(PC++); + + if (reset) { + op = 0x101; + } else if (nmi_now && !nmi_edge_detected) { + op = 0x100; + nmi_edge_detected = true; + } else if (intr && !P.I) { + op = 0x102; + } + if (!nmi_now) nmi_edge_detected = false; + + // Define function pointers for each opcode (00..FF) and each interrupt + // (100,101,102) +#define c(n) Ins<0x##n>, Ins<0x##n + 1>, +#define o(n) c(n) c(n + 2) c(n + 4) c(n + 6) + static void (*const i[0x108])() = { + o(00) o(08) o(10) o(18) o(20) o(28) o(30) o(38) o(40) o(48) o(50) o(58) + o(60) o(68) o(70) o(78) o(80) o(88) o(90) o(98) o(A0) o(A8) o(B0) + o(B8) o(C0) o(C8) o(D0) o(D8) o(E0) o(E8) o(F0) o(F8) o(100)}; +#undef o +#undef c + i[op](); + + reset = false; +} + +} // namespace CPU + +int main(int argc, char** argv) { + FILE* fp; + + if (argc <= 1 || (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-?") == 0 || + strcmp(argv[1], "--help") == 0)) { + fprintf(stderr, "%s%s%s\n", "Usage: ", argv[0], " ROM [FMV]"); + exit(1); + } + + // Open the ROM file specified on commandline + fp = fopen(argv[1], "rb"); /* your .nes file */ + inputfn = argc >= 3 ? argv[2] : NULL; /* some tas thing */ + + if (!fp) { + fprintf(stderr, "%s%s\n", "not a nes rom file: ", argv[1]); + exit(2); + } + + if (!(fgetc(fp) == 'N' && fgetc(fp) == 'E' && fgetc(fp) == 'S' && + fgetc(fp) == CTRL('Z'))) { + fprintf(stderr, "%s%s\n", "not a nes rom file: ", argv[1]); + exit(3); + } + + // open speaker + // todo: this needs plenty of work + devnull_ = open("/dev/null", O_WRONLY); + ffplay_ = commandvenv("FFPLAY", "ffplay"); + if (devnull_ != -1 && ffplay_) { + const char* args[] = { + "ffplay", "-nodisp", "-loglevel", "quiet", "-fflags", "nobuffer", "-ac", + "1", "-ar", "1789773", "-f", "s16le", "pipe:", NULL, + }; + TrySpeaker(ffplay_, (char* const*)args); + } + + // Read the ROM file header + u8 rom16count = fgetc(fp); + u8 vrom8count = fgetc(fp); + u8 ctrlbyte = fgetc(fp); + u8 mappernum = fgetc(fp) | (ctrlbyte >> 4); + + fgetc(fp); + fgetc(fp); + fgetc(fp); + fgetc(fp); + fgetc(fp); + fgetc(fp); + fgetc(fp); + fgetc(fp); + + if (mappernum >= 0x40) mappernum &= 15; + GamePak::mappernum = mappernum; + + // Read the ROM data + if (rom16count) GamePak::ROM.resize(rom16count * 0x4000); + if (vrom8count) GamePak::VRAM.resize(vrom8count * 0x2000); + fread(&GamePak::ROM[0], rom16count, 0x4000, fp); + fread(&GamePak::VRAM[0], vrom8count, 0x2000, fp); + + fclose(fp); + printf("%u * 16kB ROM, %u * 8kB VROM, mapper %u, ctrlbyte %02X\n", rom16count, + vrom8count, mappernum, ctrlbyte); + + // Start emulation + GamePak::Init(); + IoInit(); + PPU::reg.value = 0; + + // Pre-initialize RAM the same way as FCEUX does, to improve TAS sync. + for (unsigned a = 0; a < 0x800; ++a) CPU::RAM[a] = (a & 4) ? 0xFF : 0x00; + + // Run the CPU until the program is killed. + for (;;) CPU::Op(); +} diff --git a/libc/integral/c.inc b/libc/integral/c.inc index d45562f8..5e3ea3e4 100644 --- a/libc/integral/c.inc +++ b/libc/integral/c.inc @@ -1024,9 +1024,9 @@ STATIC_YOINK_SOURCE(__BASE_FILE__); #endif #ifndef __cplusplus -#define COSMOPOLITAN_CPP_START_ -#define COSMOPOLITAN_CPP_END_ -#define COSMOPOLITAN_CPP_USING_ +#define COSMOPOLITAN_CXX_START_ +#define COSMOPOLITAN_CXX_END_ +#define COSMOPOLITAN_CXX_USING_ #define COSMOPOLITAN_C_START_ #define COSMOPOLITAN_C_END_ #endif diff --git a/libc/integral/cxx.inc b/libc/integral/cxx.inc index 35891bcc..5c96555d 100644 --- a/libc/integral/cxx.inc +++ b/libc/integral/cxx.inc @@ -1,6 +1,6 @@ -#define COSMOPOLITAN_CPP_START_ namespace cosmo { -#define COSMOPOLITAN_CPP_END_ } -#define COSMOPOLITAN_CPP_USING_ using namespace cosmo; +#define COSMOPOLITAN_CXX_START_ namespace cosmo { +#define COSMOPOLITAN_CXX_END_ } +#define COSMOPOLITAN_CXX_USING_ using namespace cosmo; #define COSMOPOLITAN_C_START_ extern "C" { #define COSMOPOLITAN_C_END_ } diff --git a/libc/libc.mk b/libc/libc.mk index 0fa0369a..1c1faeae 100644 --- a/libc/libc.mk +++ b/libc/libc.mk @@ -26,6 +26,7 @@ o/$(MODE)/libc: o/$(MODE)/libc/alg \ o/$(MODE)/libc/math \ o/$(MODE)/libc/mem \ o/$(MODE)/libc/nexgen32e \ + o/$(MODE)/libc/ohmyplus \ o/$(MODE)/libc/rand \ o/$(MODE)/libc/runtime \ o/$(MODE)/libc/sock \ diff --git a/libc/ohmyplus/ohmyplus.mk b/libc/ohmyplus/ohmyplus.mk new file mode 100644 index 00000000..a5714608 --- /dev/null +++ b/libc/ohmyplus/ohmyplus.mk @@ -0,0 +1,55 @@ +#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐ +#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘ + +PKGS += LIBC_OHMYPLUS + +LIBC_OHMYPLUS_ARTIFACTS += LIBC_OHMYPLUS_A +LIBC_OHMYPLUS = $(LIBC_OHMYPLUS_A_DEPS) $(LIBC_OHMYPLUS_A) +LIBC_OHMYPLUS_A = o/$(MODE)/libc/ohmyplus/ohmyplus.a +LIBC_OHMYPLUS_A_FILES := $(wildcard libc/ohmyplus/*) +LIBC_OHMYPLUS_A_HDRS = $(filter %.h,$(LIBC_OHMYPLUS_A_FILES)) +LIBC_OHMYPLUS_A_SRCS_S = $(filter %.S,$(LIBC_OHMYPLUS_A_FILES)) +LIBC_OHMYPLUS_A_SRCS_C = $(filter %.c,$(LIBC_OHMYPLUS_A_FILES)) +LIBC_OHMYPLUS_A_SRCS_CXX = $(filter %.cc,$(LIBC_OHMYPLUS_A_FILES)) + +LIBC_OHMYPLUS_A_SRCS = \ + $(LIBC_OHMYPLUS_A_SRCS_S) \ + $(LIBC_OHMYPLUS_A_SRCS_C) \ + $(LIBC_OHMYPLUS_A_SRCS_CXX) + +LIBC_OHMYPLUS_A_OBJS = \ + $(LIBC_OHMYPLUS_A_SRCS:%=o/$(MODE)/%.zip.o) \ + $(LIBC_OHMYPLUS_A_SRCS_S:%.S=o/$(MODE)/%.o) \ + $(LIBC_OHMYPLUS_A_SRCS_C:%.c=o/$(MODE)/%.o) \ + $(LIBC_OHMYPLUS_A_SRCS_CXX:%.cc=o/$(MODE)/%.o) + +LIBC_OHMYPLUS_A_CHECKS = \ + $(LIBC_OHMYPLUS_A).pkg \ + $(LIBC_OHMYPLUS_A_HDRS:%=o/$(MODE)/%.okk) + +LIBC_OHMYPLUS_A_DIRECTDEPS = \ + LIBC_BITS \ + LIBC_MEM \ + LIBC_STUBS + +LIBC_OHMYPLUS_A_DEPS := \ + $(call uniq,$(foreach x,$(LIBC_OHMYPLUS_A_DIRECTDEPS),$($(x)))) + +$(LIBC_OHMYPLUS_A): \ + libc/ohmyplus/ \ + $(LIBC_OHMYPLUS_A).pkg \ + $(LIBC_OHMYPLUS_A_OBJS) + +$(LIBC_OHMYPLUS_A).pkg: \ + $(LIBC_OHMYPLUS_A_OBJS) \ + $(foreach x,$(LIBC_OHMYPLUS_A_DIRECTDEPS),$($(x)_A).pkg) + +LIBC_OHMYPLUS_LIBS = $(foreach x,$(LIBC_OHMYPLUS_ARTIFACTS),$($(x))) +LIBC_OHMYPLUS_SRCS = $(foreach x,$(LIBC_OHMYPLUS_ARTIFACTS),$($(x)_SRCS)) +LIBC_OHMYPLUS_HDRS = $(foreach x,$(LIBC_OHMYPLUS_ARTIFACTS),$($(x)_HDRS)) +LIBC_OHMYPLUS_CHECKS = $(foreach x,$(LIBC_OHMYPLUS_ARTIFACTS),$($(x)_CHECKS)) +LIBC_OHMYPLUS_OBJS = $(foreach x,$(LIBC_OHMYPLUS_ARTIFACTS),$($(x)_OBJS)) +$(LIBC_OHMYPLUS_OBJS): $(BUILD_FILES) libc/ohmyplus/ohmyplus.mk + +.PHONY: o/$(MODE)/libc/ohmyplus +o/$(MODE)/libc/ohmyplus: $(LIBC_OHMYPLUS_CHECKS) diff --git a/libc/ohmyplus/vector.c b/libc/ohmyplus/vector.c new file mode 100644 index 00000000..88227efc --- /dev/null +++ b/libc/ohmyplus/vector.c @@ -0,0 +1,28 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/bits/bits.h" +#include "libc/mem/mem.h" + +void __vector_reserve(size_t n, size_t m, intptr_t **data, size_t *toto) { + if ((n = roundup2pow(n)) > *toto) { + *toto = n; + *data = realloc(*data, n * m); + } +} diff --git a/libc/ohmyplus/vector.h b/libc/ohmyplus/vector.h new file mode 100644 index 00000000..dd791a83 --- /dev/null +++ b/libc/ohmyplus/vector.h @@ -0,0 +1,52 @@ +/*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│ +│vi: set net ft=c++ ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#ifndef COSMOPOLITAN_LIBC_OHMYPLUS_VECTOR_H_ +#define COSMOPOLITAN_LIBC_OHMYPLUS_VECTOR_H_ +extern "C" { +void __vector_reserve(size_t, size_t, intptr_t **, size_t *); +} /* extern c */ +namespace std { + +template +class vector { + public: + vector() : data_(NULL), size_(0), toto_(0) {} + vector(size_t n) : data_(NULL), size_(n), toto_(0) { VectorReserve(n); } + size_t size() const { return size_; } + size_t capacity() const { return toto_; } + T &front() { return data_[0]; } + T &back() { return data_[size_ - 1]; } + void clear() { size_ = 0; } + void reserve(size_t n) { VectorReserve(n); } + void resize(size_t n) { reserve((size_ = n)); } + bool empty() const { return !size_; } + T &operator[](size_t i) { return data_[i]; } + + private: + T *data_; + size_t size_; + size_t toto_; + void VectorReserve(size_t n) { + __vector_reserve(n, sizeof(T), (intptr_t **)&data_, &toto_); + } +}; + +}; /* namespace std */ +#endif /* COSMOPOLITAN_LIBC_OHMYPLUS_VECTOR_H_ */ diff --git a/libc/sysv/consts/poll.h b/libc/sysv/consts/poll.h index 605841d1..7181227a 100644 --- a/libc/sysv/consts/poll.h +++ b/libc/sysv/consts/poll.h @@ -2,14 +2,14 @@ #define COSMOPOLITAN_LIBC_SYSV_CONSTS_POLL_H_ #include "libc/runtime/symbolic.h" -#define POLLERR SYMBOLIC(POLLERR) -#define POLLHUP SYMBOLIC(POLLHUP) -#define POLLIN SYMBOLIC(POLLIN) -#define POLLNVAL SYMBOLIC(POLLNVAL) -#define POLLOUT SYMBOLIC(POLLOUT) -#define POLLPRI SYMBOLIC(POLLPRI) +#define POLLERR SYMBOLIC(POLLERR) +#define POLLHUP SYMBOLIC(POLLHUP) +#define POLLIN SYMBOLIC(POLLIN) +#define POLLNVAL SYMBOLIC(POLLNVAL) +#define POLLOUT SYMBOLIC(POLLOUT) +#define POLLPRI SYMBOLIC(POLLPRI) #define POLLRDBAND SYMBOLIC(POLLRDBAND) -#define POLLRDHUP SYMBOLIC(POLLRDHUP) +#define POLLRDHUP SYMBOLIC(POLLRDHUP) #define POLLRDNORM SYMBOLIC(POLLRDNORM) #define POLLWRBAND SYMBOLIC(POLLWRBAND) #define POLLWRNORM SYMBOLIC(POLLWRNORM) @@ -17,17 +17,17 @@ #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ -hidden extern const long POLLERR; -hidden extern const long POLLHUP; -hidden extern const long POLLIN; -hidden extern const long POLLNVAL; -hidden extern const long POLLOUT; -hidden extern const long POLLPRI; -hidden extern const long POLLRDBAND; -hidden extern const long POLLRDHUP; -hidden extern const long POLLRDNORM; -hidden extern const long POLLWRBAND; -hidden extern const long POLLWRNORM; +hidden extern const int16_t POLLERR; +hidden extern const int16_t POLLHUP; +hidden extern const int16_t POLLIN; +hidden extern const int16_t POLLNVAL; +hidden extern const int16_t POLLOUT; +hidden extern const int16_t POLLPRI; +hidden extern const int16_t POLLRDBAND; +hidden extern const int16_t POLLRDHUP; +hidden extern const int16_t POLLRDNORM; +hidden extern const int16_t POLLWRBAND; +hidden extern const int16_t POLLWRNORM; COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/libc/testlib/ezbench.h b/libc/testlib/ezbench.h index a7286234..0ec61010 100644 --- a/libc/testlib/ezbench.h +++ b/libc/testlib/ezbench.h @@ -2,6 +2,7 @@ #define COSMOPOLITAN_LIBC_TESTLIB_EZBENCH_H_ #include "libc/macros.h" #include "libc/testlib/bench.h" +#include "libc/testlib/testlib.h" #if !(__ASSEMBLER__ + __LINKER__ + 0) #define EZBENCH(INIT, EXPR) EZBENCH2(#EXPR, INIT, EXPR) diff --git a/libc/tinymath/cbrt.S b/libc/tinymath/cbrt.S new file mode 100644 index 00000000..dc07a57e --- /dev/null +++ b/libc/tinymath/cbrt.S @@ -0,0 +1,29 @@ +/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.h" + +/ Returns cube root of 𝑥. +/ +/ @param %xmm0 holds binary64 number +/ @return %xmm0 holds binary64 result +tinymath_cbrt: + jmp __cbrt + .endfn tinymath_cbrt,globl + .alias tinymath_cbrt,cbrt diff --git a/libc/tinymath/cbrt.c b/libc/tinymath/cbrtc.c similarity index 98% rename from libc/tinymath/cbrt.c rename to libc/tinymath/cbrtc.c index e44d7ec3..011487f8 100644 --- a/libc/tinymath/cbrt.c +++ b/libc/tinymath/cbrtc.c @@ -11,9 +11,6 @@ * * Optimized by Bruce D. Evans. */ -/* cbrt(x) - * Return cube root of x - */ #include "libc/math.h" asm(".ident\t\"\\n\\n\ @@ -32,7 +29,10 @@ static const double P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */ P3 = -0.758397934778766047437, /* 0xbfe844cb, 0xbee751d9 */ P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */ -double(cbrt)(double x) { +/** + * Returns cube root of 𝑥. + */ +double __cbrt(double x) { union { double f; uint64_t i; diff --git a/libc/tinymath/cbrtf.S b/libc/tinymath/cbrtf.S new file mode 100644 index 00000000..58eb2dae --- /dev/null +++ b/libc/tinymath/cbrtf.S @@ -0,0 +1,35 @@ +/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.h" + +/ Returns cube root of 𝑥. +/ +/ @param %xmm0 holds binary32 number +/ @return %xmm0 holds binary32 result +tinymath_cbrtf: + pushq %rbp + mov %rsp,%rbp + cvtss2sd %xmm0,%xmm0 + call __cbrt + cvtsd2ss %xmm0,%xmm0 + popq %rbp + ret + .endfn tinymath_cbrtf,globl + .alias tinymath_cbrtf,cbrtf diff --git a/libc/tinymath/cbrtl.S b/libc/tinymath/cbrtl.S new file mode 100644 index 00000000..d2534500 --- /dev/null +++ b/libc/tinymath/cbrtl.S @@ -0,0 +1,39 @@ +/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.h" + +/ Returns cube root of 𝑥. +/ +/ @param 𝑥 is an 80-bit long double passed on stack in 16-bytes +/ @return result of computation on FPU stack in %st +tinymath_cbrtl: + pushq %rbp + mov %rsp,%rbp + sub $16,%rsp + fldt 16(%rbp) + fstpl -8(%rbp) + movsd -8(%rbp),%xmm0 + call __cbrt + movsd %xmm0,-8(%rbp) + fldl -8(%rbp) + leave + ret + .endfn tinymath_cbrtl,globl + .alias tinymath_cbrtl,cbrtl diff --git a/test/dsp/scale/magikarp_test.c b/test/dsp/scale/magikarp_test.c index 60dd1ec3..05b2cd43 100644 --- a/test/dsp/scale/magikarp_test.c +++ b/test/dsp/scale/magikarp_test.c @@ -17,6 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "dsp/scale/cdecimate2xuint8x8.h" #include "dsp/scale/scale.h" #include "libc/fmt/bing.h" #include "libc/nexgen32e/x86feature.h" diff --git a/tool/emacs/cosmo-c-builtins.el b/tool/emacs/cosmo-c-builtins.el index b57289c1..f01df3ab 100644 --- a/tool/emacs/cosmo-c-builtins.el +++ b/tool/emacs/cosmo-c-builtins.el @@ -1298,9 +1298,9 @@ (cosmopolitan-builtin-defines '("__COSMOPOLITAN__" "__LINKER__" - "COSMOPOLITAN_CPP_START_" - "COSMOPOLITAN_CPP_END_" - "COSMOPOLITAN_CPP_USING_" + "COSMOPOLITAN_CXX_START_" + "COSMOPOLITAN_CXX_END_" + "COSMOPOLITAN_CXX_USING_" "COSMOPOLITAN_C_START_" "COSMOPOLITAN_C_END_" "MACHINE_CODE_ANALYSIS_BEGIN_" diff --git a/tool/emacs/cosmo.el b/tool/emacs/cosmo.el index 0e780295..0d4a8419 100644 --- a/tool/emacs/cosmo.el +++ b/tool/emacs/cosmo.el @@ -1,4 +1,4 @@ -(require 'mog-mode) +(require 'ctest-mode) (require 'ld-script) (require 'optinfo-mode) (require 'protobuf-mode)