/* This file is a stand-alone program which will regenerate processed dictionary files from a raw Project Gutenberg dictionary file. See ::/Doc/Credits.DD. */ U0 ACDPreprocess(U8 *in_name, U8 *out_name) {/* <cr><nl>--> <nl> $ --> $$ \'89 --> . */ I64 ch, i; U8 *src, *dst; CDoc *doc; CDocEntry *doc_e; if (doc = DocRead(in_name, DOCF_PLAIN_TEXT_TABS | DOCF_DBL_DOLLARS)) { doc_e = doc->head.next; while (doc_e != doc) { if (doc_e->type_u8 == DOCT_TEXT) { src = dst = doc_e->tag; while (ch = *src++) { if (ch == '\\' && *src == '\'') { src++; i = 0; ch = ToUpper(*src++); if ('0' <= ch <= '9') i += ch - '0'; else if ('A' <= ch <= 'F') i += ch - 'A' + 10; i <<= 4; ch = ToUpper(*src++); if ('0' <= ch <= '9') i += ch - '0'; else if ('A' <= ch <= 'F') i += ch - 'A' + 10; *dst++ = i; } else *dst++ = ch; } *dst = 0; } doc_e = doc_e->next; } StrCopy(doc->filename.name, out_name); DocWrite(doc); DocDel(doc); } } I64 ACDNextCmd(U8 **_ptr) { U8 *ptr = *_ptr, *ptr2; I64 ch, res = -1; do { do { if (!(ch = *ptr++)) goto ncmd_done; } while (ch != '<'); ptr2 = ptr; do { if (!(ch = *ptr2++)) goto ncmd_done; } while (ch != '>'); *--ptr2 = 0; res = ListMatch(ptr, "h1\0/h1\0def\0/def\0hw\0/hw\0tt\0/tt\0" "ety\0@fld\0@cd\0@blockquote\0@wordforms\0@note\0@altname\0@chform\0" "@cref\0@syn\0/ety\0@/fld\0@/cd\0@/blockquote\0@/wordforms\0@/note\0" "@/altname\0@/chform\0@/cref\0@/syn\0"); *ptr2++ = '>'; ptr = ptr2; } while (res < 0); ncmd_done: *_ptr = ptr; return res; } U8 *ACDNextEntry(U8 **_ptr) { U8 *res, *ignore, *ptr = *_ptr, buf[ACD_BLK_SIZE], *out_ptr = buf; I64 ch, l; while (TRUE) { while (TRUE) { if (!(ch = *ptr++)) goto nentry_done; if (ch != '<') { *out_ptr++ = ch; if (ch == '$') *out_ptr++ = ch; } else break; } ignore = "b>\0i>\0ppp>\0/b>\0/i>\0/p>\0" "ets>\0col>\0spn>\0/ets>\0/col>\0/spn>\0er>\0as>\0cs>\0cd>\0ex>\0" "/er>\0/as>\0/cs>\0/cd>\0/ex>\0" "note>\0/note>\0blockquote>\0/blockquote>\0"; while (*ignore) { l = StrLen(ignore); if (!StrNCompare(ptr, ignore, l)) { ptr += l; break; } else ignore += l + 1; } if (!*ignore) break; } nentry_done: *out_ptr++ = 0; res = StrNew(buf); *_ptr = ptr - 1; return res; } I64 ACDCompareWords(U8 *e1, U8 *e2) { return StrICompare(e1, e2); } U8 *ACDSortWords(U8 *start, I64 size, I64 word_count) { U8 **ptr_array = MAlloc(sizeof(U8 *)*word_count), *out_start = MAlloc(size), *ptr = start,*ptr2; I64 i = 0; while (*ptr) { ptr_array[i++] = ptr; ptr += StrLen(ptr) + 3; } "Sorting...\n"; Sleep(100); QuickSortI64(ptr_array, word_count, &ACDCompareWords); "Done...\n"; Sleep(100); ptr = out_start; for (i = 0; i < word_count; i++) { ptr2 = ptr_array[i]; while (*ptr2) *ptr++ = *ptr2++; *ptr++ = *ptr2++; //zero *ptr++ = *ptr2++; //blk lo *ptr++ = *ptr2++; //blk hi } *ptr++ = 0; return out_start; } U0 ACDGen(U8 *in_file) { I64 cmd, size, word_count = 0, largest_entry = 0; U8 *st, *in_ptr = FileRead(in_file, &size), *in_start = in_ptr, *out_ptr = MAlloc(size), *out_start = out_ptr, *word_ptr = MAlloc(size), *word_start = word_ptr, *last_word = "", *def_word_start = out_ptr, *sorted_word_start; U16 *d; if (!in_ptr) return; do { cmd = ACDNextCmd(&in_ptr); if (cmd == ACD_H1) { next_word: if (out_ptr - def_word_start > largest_entry) largest_entry = out_ptr - def_word_start; def_word_start = out_ptr; if (st = ACDNextEntry(&in_ptr)) { if (*st) { if (StrICompare(st, last_word)) { word_count++; *word_ptr++ = ACD_WORD_CHAR; last_word = word_ptr; StrCopy(word_ptr, st); word_ptr += StrLen(st) + 1; d = word_ptr; *d = (out_ptr - out_start) / ACD_BLK_SIZE; word_ptr += 2; *out_ptr++ = ACD_WORD_CHAR; StrCopy(out_ptr, st); out_ptr += StrLen(st) + 1; } Free(st); do { do { cmd = ACDNextCmd(&in_ptr); if (cmd == ACD_H1) goto next_word; } while (cmd >= 0 && !(cmd == ACD_DEF || cmd == ACD_PRONUNCIATION || cmd == ACD_POS || cmd == ACD_EXTRA)); if (cmd == ACD_DEF) { if (st = ACDNextEntry(&in_ptr)) { if (*st) { *out_ptr++ = ACD_DEF_CHAR; StrCopy(out_ptr, st); out_ptr += StrLen(st) + 1; } Free(st); } } else if (cmd == ACD_PRONUNCIATION) { if (st = ACDNextEntry(&in_ptr)) { if (*st) { *out_ptr++ = ACD_PRONUNCIATION_CHAR; StrCopy(out_ptr, st); out_ptr += StrLen(st) + 1; } Free(st); } } else if (cmd == ACD_POS) { if (st = ACDNextEntry(&in_ptr)) { if (*st) { *out_ptr++ = ACD_POS_CHAR; StrCopy(out_ptr, st); out_ptr += StrLen(st) + 1; } Free(st); } } else if (cmd == ACD_EXTRA) { if (st = ACDNextEntry(&in_ptr)) { if (*st) { *out_ptr++ = ACD_EXTRA_CHAR; StrCopy(out_ptr, st); out_ptr += StrLen(st) + 1; } Free(st); } } } while (cmd == ACD_DEF || cmd == ACD_PRONUNCIATION || cmd == ACD_POS || cmd == ACD_EXTRA); } else Free(st); } } } while (cmd >= 0); *out_ptr++ = ACD_END_CHAR; *word_ptr++ = ACD_END_CHAR; Free(in_start); "Blk Size :%d\n", ACD_BLK_SIZE; "Blk Count :%04X\n", (out_ptr - out_start + ACD_BLK_SIZE - 1) / ACD_BLK_SIZE; "Largest Entry :%d\n", largest_entry; "Word Count :%d\n", word_count; FileWrite(ACD_DEF_FILENAME, out_start, out_ptr - out_start); "Def File Size :%d\n", out_ptr - out_start; sorted_word_start = ACDSortWords(word_start, word_ptr - word_start, word_count); FileWrite(ACD_WORD_FILENAME, sorted_word_start, word_ptr - word_start); "Word File Size:%d\n", word_ptr - word_start; Free(out_start); Free(word_start); Free(sorted_word_start); } Cd(__DIR__); ACDPreprocess("DICTIONARY.DD", "DICTIONARY2.DD"); ACDGen("DICTIONARY2.DD");