2020-06-27 19:00:43 +00:00
|
|
|
|
#if 0
|
|
|
|
|
/*─────────────────────────────────────────────────────────────────╗
|
|
|
|
|
│ To the extent possible under law, Justine Tunney has waived │
|
|
|
|
|
│ all copyright and related or neighboring rights to this file, │
|
|
|
|
|
│ as it is written in the following disclaimers: │
|
|
|
|
|
│ • http://unlicense.org/ │
|
|
|
|
|
│ • http://creativecommons.org/publicdomain/zero/1.0/ │
|
|
|
|
|
╚─────────────────────────────────────────────────────────────────*/
|
|
|
|
|
#endif
|
|
|
|
|
#include "libc/alg/alg.h"
|
2020-11-25 16:19:00 +00:00
|
|
|
|
#include "libc/alg/arraylist.internal.h"
|
|
|
|
|
#include "libc/alg/critbit0.h"
|
2020-06-27 19:00:43 +00:00
|
|
|
|
#include "libc/bits/bits.h"
|
|
|
|
|
#include "libc/calls/calls.h"
|
2020-12-09 23:04:54 +00:00
|
|
|
|
#include "libc/fmt/conv.h"
|
2020-06-27 19:00:43 +00:00
|
|
|
|
#include "libc/log/check.h"
|
|
|
|
|
#include "libc/log/log.h"
|
|
|
|
|
#include "libc/macros.h"
|
|
|
|
|
#include "libc/mem/mem.h"
|
|
|
|
|
#include "libc/stdio/stdio.h"
|
|
|
|
|
#include "libc/str/str.h"
|
2020-11-25 16:19:00 +00:00
|
|
|
|
#include "libc/str/tpdecode.internal.h"
|
2020-06-27 19:00:43 +00:00
|
|
|
|
#include "libc/sysv/consts/fileno.h"
|
|
|
|
|
#include "libc/x/x.h"
|
|
|
|
|
|
2020-10-11 04:18:53 +00:00
|
|
|
|
STATIC_YOINK("zip_uri_support");
|
|
|
|
|
|
2020-06-27 19:00:43 +00:00
|
|
|
|
/**
|
|
|
|
|
* @fileoverview Simple Interactive Spell Checker.
|
|
|
|
|
*
|
|
|
|
|
* This is an attempt to get spell checking to work in Emacs across
|
|
|
|
|
* platforms. While the computer science behind spell checking is very
|
|
|
|
|
* simple, unfortunately Emacs probes all these System Five spell progs
|
|
|
|
|
* similar to how websites have been known to probe User-Agent strings.
|
|
|
|
|
*
|
|
|
|
|
* Here's how we believe the repl works:
|
|
|
|
|
*
|
|
|
|
|
* $ make -j8 o//examples/ispell.com
|
|
|
|
|
* $ o//examples/ispell.com
|
|
|
|
|
* @(#) Cosmopolitan Ispell Version 3.4.00 8 Feb 2015
|
|
|
|
|
* word: hello
|
|
|
|
|
* ok
|
|
|
|
|
* word: héllo
|
|
|
|
|
* how about: hills, hello, hilly, jello
|
|
|
|
|
* word: lova
|
|
|
|
|
* how about: diva, dona, dora, dove, elva, fora, hove, iota
|
|
|
|
|
* word: hecruhecrue
|
|
|
|
|
* not found
|
|
|
|
|
*
|
|
|
|
|
* The dictionary for this program is embedded as a text file within the
|
|
|
|
|
* zip structure of the binary. It can be edited after distribution.
|
|
|
|
|
*
|
|
|
|
|
* It's possible to go even fancier than what this code is doing, by
|
|
|
|
|
* using cmudict phonemes, bloom filters, unicode tables e.g. e vs. é,
|
|
|
|
|
* and even doing ML similar to Google's online spell checker.
|
|
|
|
|
*
|
|
|
|
|
* TODO: Figure out why Emacs rejects this interface.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#define MISSING_LETTER_DISTANCE 5
|
|
|
|
|
#define MAX_NEARBY_WORD_DISTANCE 6
|
|
|
|
|
#define MAX_NEARBY_RESULTS 8
|
|
|
|
|
|
|
|
|
|
FILE *f;
|
|
|
|
|
char *line;
|
|
|
|
|
size_t linesize;
|
|
|
|
|
const char *query;
|
|
|
|
|
struct critbit0 words; /* does O(log 𝑛) fast path lookup */
|
|
|
|
|
|
|
|
|
|
struct NearbyWords {
|
|
|
|
|
size_t i, n;
|
|
|
|
|
struct WordMatch {
|
|
|
|
|
long dist;
|
|
|
|
|
char *word;
|
|
|
|
|
} * p;
|
|
|
|
|
} nearby;
|
|
|
|
|
|
|
|
|
|
long WordDistance(const char *a, const char *b) {
|
|
|
|
|
long dist;
|
|
|
|
|
int gota, gotb;
|
|
|
|
|
unsigned long h, p;
|
|
|
|
|
wint_t chara, charb;
|
|
|
|
|
dist = p = 0;
|
|
|
|
|
for (;;) {
|
|
|
|
|
gota = abs(tpdecode(a, &chara)); /* parses utf-8 multibyte characters */
|
|
|
|
|
gotb = abs(tpdecode(b, &charb)); /* abs() handles -1, always yields <20> */
|
|
|
|
|
if (!chara && !charb) break;
|
|
|
|
|
if (!chara || !charb) {
|
|
|
|
|
dist += MISSING_LETTER_DISTANCE;
|
|
|
|
|
} else if ((h = hamming(chara, charb))) {
|
|
|
|
|
dist += h + p++; /* penalize multiple mismatched letters */
|
|
|
|
|
}
|
|
|
|
|
if (chara) a += gota;
|
|
|
|
|
if (charb) b += gotb;
|
|
|
|
|
}
|
|
|
|
|
return dist;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
intptr_t ConsiderWord(const char *word, void *arg) {
|
|
|
|
|
long dist;
|
|
|
|
|
if ((dist = WordDistance(word, query)) < MAX_NEARBY_WORD_DISTANCE) {
|
|
|
|
|
append(&nearby, &((struct WordMatch){dist, word}));
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int CompareWords(const struct WordMatch *a, const struct WordMatch *b) {
|
|
|
|
|
return a->dist < b->dist;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void FindNearbyWords(void) {
|
|
|
|
|
nearby.i = 0;
|
|
|
|
|
critbit0_allprefixed(&words, "", ConsiderWord, NULL);
|
|
|
|
|
qsort(nearby.p, nearby.i, sizeof(struct WordMatch), (void *)CompareWords);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void SpellChecker(void) {
|
|
|
|
|
int i;
|
|
|
|
|
printf("@(#) Cosmopolitan Ispell Version 3.4.00 8 Feb 2015\r\n");
|
|
|
|
|
while (!feof(stdin)) {
|
|
|
|
|
printf("word: ");
|
|
|
|
|
fflush(stdout);
|
|
|
|
|
if (getline(&line, &linesize, stdin) > 0) {
|
|
|
|
|
query = strtolower(chomp(line));
|
|
|
|
|
if (critbit0_contains(&words, query)) {
|
|
|
|
|
printf("ok\r\n");
|
|
|
|
|
} else {
|
|
|
|
|
FindNearbyWords();
|
|
|
|
|
if (nearby.i) {
|
|
|
|
|
printf("how about: ");
|
|
|
|
|
for (i = 0; i < MIN(MAX_NEARBY_RESULTS, nearby.i); ++i) {
|
|
|
|
|
if (i) printf(", ");
|
|
|
|
|
fputs(nearby.p[i].word, stdout);
|
|
|
|
|
}
|
|
|
|
|
printf("\r\n");
|
|
|
|
|
} else {
|
|
|
|
|
printf("not found\r\n");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
printf("\r\n");
|
|
|
|
|
}
|
|
|
|
|
CHECK_NE(-1, fclose(stdin));
|
|
|
|
|
CHECK_NE(-1, fclose(stdout));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void LoadWords(void) {
|
|
|
|
|
CHECK_NOTNULL((f = fopen("zip:usr/share/dict/words", "r")));
|
|
|
|
|
while (getline(&line, &linesize, f) > 0) {
|
|
|
|
|
critbit0_insert(&words, strtolower(chomp(line)));
|
|
|
|
|
}
|
|
|
|
|
CHECK_NE(-1, fclose(f));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int main(int argc, char *argv[]) {
|
|
|
|
|
showcrashreports();
|
|
|
|
|
LoadWords();
|
|
|
|
|
SpellChecker();
|
|
|
|
|
return 0;
|
|
|
|
|
}
|