158 lines
4.8 KiB
C
158 lines
4.8 KiB
C
#if 0
|
||
/*─────────────────────────────────────────────────────────────────╗
|
||
│ To the extent possible under law, Justine Tunney has waived │
|
||
│ all copyright and related or neighboring rights to this file, │
|
||
│ as it is written in the following disclaimers: │
|
||
│ • http://unlicense.org/ │
|
||
│ • http://creativecommons.org/publicdomain/zero/1.0/ │
|
||
╚─────────────────────────────────────────────────────────────────*/
|
||
#endif
|
||
#include "libc/alg/alg.h"
|
||
#include "libc/alg/arraylist.h"
|
||
#include "libc/bits/bits.h"
|
||
#include "libc/calls/calls.h"
|
||
#include "libc/conv/conv.h"
|
||
#include "libc/log/check.h"
|
||
#include "libc/log/log.h"
|
||
#include "libc/macros.h"
|
||
#include "libc/mem/mem.h"
|
||
#include "libc/stdio/stdio.h"
|
||
#include "libc/str/str.h"
|
||
#include "libc/str/tpdecode.h"
|
||
#include "libc/sysv/consts/fileno.h"
|
||
#include "libc/x/x.h"
|
||
|
||
/**
|
||
* @fileoverview Simple Interactive Spell Checker.
|
||
*
|
||
* This is an attempt to get spell checking to work in Emacs across
|
||
* platforms. While the computer science behind spell checking is very
|
||
* simple, unfortunately Emacs probes all these System Five spell progs
|
||
* similar to how websites have been known to probe User-Agent strings.
|
||
*
|
||
* Here's how we believe the repl works:
|
||
*
|
||
* $ make -j8 o//examples/ispell.com
|
||
* $ o//examples/ispell.com
|
||
* @(#) Cosmopolitan Ispell Version 3.4.00 8 Feb 2015
|
||
* word: hello
|
||
* ok
|
||
* word: héllo
|
||
* how about: hills, hello, hilly, jello
|
||
* word: lova
|
||
* how about: diva, dona, dora, dove, elva, fora, hove, iota
|
||
* word: hecruhecrue
|
||
* not found
|
||
*
|
||
* The dictionary for this program is embedded as a text file within the
|
||
* zip structure of the binary. It can be edited after distribution.
|
||
*
|
||
* It's possible to go even fancier than what this code is doing, by
|
||
* using cmudict phonemes, bloom filters, unicode tables e.g. e vs. é,
|
||
* and even doing ML similar to Google's online spell checker.
|
||
*
|
||
* TODO: Figure out why Emacs rejects this interface.
|
||
*/
|
||
|
||
#define MISSING_LETTER_DISTANCE 5
|
||
#define MAX_NEARBY_WORD_DISTANCE 6
|
||
#define MAX_NEARBY_RESULTS 8
|
||
|
||
FILE *f;
|
||
char *line;
|
||
size_t linesize;
|
||
const char *query;
|
||
struct critbit0 words; /* does O(log 𝑛) fast path lookup */
|
||
|
||
struct NearbyWords {
|
||
size_t i, n;
|
||
struct WordMatch {
|
||
long dist;
|
||
char *word;
|
||
} * p;
|
||
} nearby;
|
||
|
||
long WordDistance(const char *a, const char *b) {
|
||
long dist;
|
||
int gota, gotb;
|
||
unsigned long h, p;
|
||
wint_t chara, charb;
|
||
dist = p = 0;
|
||
for (;;) {
|
||
gota = abs(tpdecode(a, &chara)); /* parses utf-8 multibyte characters */
|
||
gotb = abs(tpdecode(b, &charb)); /* abs() handles -1, always yields <20> */
|
||
if (!chara && !charb) break;
|
||
if (!chara || !charb) {
|
||
dist += MISSING_LETTER_DISTANCE;
|
||
} else if ((h = hamming(chara, charb))) {
|
||
dist += h + p++; /* penalize multiple mismatched letters */
|
||
}
|
||
if (chara) a += gota;
|
||
if (charb) b += gotb;
|
||
}
|
||
return dist;
|
||
}
|
||
|
||
intptr_t ConsiderWord(const char *word, void *arg) {
|
||
long dist;
|
||
if ((dist = WordDistance(word, query)) < MAX_NEARBY_WORD_DISTANCE) {
|
||
append(&nearby, &((struct WordMatch){dist, word}));
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
int CompareWords(const struct WordMatch *a, const struct WordMatch *b) {
|
||
return a->dist < b->dist;
|
||
}
|
||
|
||
void FindNearbyWords(void) {
|
||
nearby.i = 0;
|
||
critbit0_allprefixed(&words, "", ConsiderWord, NULL);
|
||
qsort(nearby.p, nearby.i, sizeof(struct WordMatch), (void *)CompareWords);
|
||
}
|
||
|
||
void SpellChecker(void) {
|
||
int i;
|
||
printf("@(#) Cosmopolitan Ispell Version 3.4.00 8 Feb 2015\r\n");
|
||
while (!feof(stdin)) {
|
||
printf("word: ");
|
||
fflush(stdout);
|
||
if (getline(&line, &linesize, stdin) > 0) {
|
||
query = strtolower(chomp(line));
|
||
if (critbit0_contains(&words, query)) {
|
||
printf("ok\r\n");
|
||
} else {
|
||
FindNearbyWords();
|
||
if (nearby.i) {
|
||
printf("how about: ");
|
||
for (i = 0; i < MIN(MAX_NEARBY_RESULTS, nearby.i); ++i) {
|
||
if (i) printf(", ");
|
||
fputs(nearby.p[i].word, stdout);
|
||
}
|
||
printf("\r\n");
|
||
} else {
|
||
printf("not found\r\n");
|
||
}
|
||
}
|
||
}
|
||
printf("\r\n");
|
||
}
|
||
CHECK_NE(-1, fclose(stdin));
|
||
CHECK_NE(-1, fclose(stdout));
|
||
}
|
||
|
||
void LoadWords(void) {
|
||
CHECK_NOTNULL((f = fopen("zip:usr/share/dict/words", "r")));
|
||
while (getline(&line, &linesize, f) > 0) {
|
||
critbit0_insert(&words, strtolower(chomp(line)));
|
||
}
|
||
CHECK_NE(-1, fclose(f));
|
||
}
|
||
|
||
int main(int argc, char *argv[]) {
|
||
showcrashreports();
|
||
LoadWords();
|
||
SpellChecker();
|
||
return 0;
|
||
}
|