/* This file is a stand-alone program which will regenerate processed dictionary files from a raw Project Gutenberg dictionary file. See ::/Doc/Credits.DD. */ U0 ACDPreprocess(U8 *in_name,U8 *out_name) {/* <cr><nl>--> <nl> $ --> $$ \'89 --> . */ I64 ch,i; U8 *src,*dst; CDoc *doc; CDocEntry *doc_e; if (doc=DocRead(in_name,DOCF_PLAIN_TEXT_TABS|DOCF_DBL_DOLLARS)) { doc_e=doc->head.next; while (doc_e!=doc) { if (doc_e->type_u8==DOCT_TEXT) { src=dst=doc_e->tag; while (ch=*src++) { if (ch=='\\' && *src=='\'') { src++; i=0; ch=ToUpper(*src++); if ('0'<=ch<='9') i+=ch-'0'; else if ('A'<=ch<='F') i+=ch-'A'+10; i<<=4; ch=ToUpper(*src++); if ('0'<=ch<='9') i+=ch-'0'; else if ('A'<=ch<='F') i+=ch-'A'+10; *dst++=i; } else *dst++=ch; } *dst=0; } doc_e=doc_e->next; } StrCpy(doc->filename.name,out_name); DocWrite(doc); DocDel(doc); } } I64 ACDNextCmd(U8 **_ptr) { U8 *ptr=*_ptr,*ptr2; I64 ch,res=-1; do { do { if (!(ch=*ptr++)) goto ncmd_done; } while (ch!='<'); ptr2=ptr; do { if (!(ch=*ptr2++)) goto ncmd_done; } while (ch!='>'); *--ptr2=0; res=LstMatch(ptr,"h1\0/h1\0def\0/def\0hw\0/hw\0tt\0/tt\0" "ety\0@fld\0@cd\0@blockquote\0@wordforms\0@note\0@altname\0@chform\0" "@cref\0@syn\0/ety\0@/fld\0@/cd\0@/blockquote\0@/wordforms\0@/note\0" "@/altname\0@/chform\0@/cref\0@/syn\0"); *ptr2++='>'; ptr=ptr2; } while (res<0); ncmd_done: *_ptr=ptr; return res; } U8 *ACDNextEntry(U8 **_ptr) { U8 *res,*ignore,*ptr=*_ptr,buf[ACD_BLK_SIZE],*out_ptr=buf; I64 ch,l; while (TRUE) { while (TRUE) { if (!(ch=*ptr++)) goto nentry_done; if (ch!='<') { *out_ptr++=ch; if (ch=='$') *out_ptr++=ch; } else break; } ignore="b>\0i>\0ppp>\0/b>\0/i>\0/p>\0" "ets>\0col>\0spn>\0/ets>\0/col>\0/spn>\0er>\0as>\0cs>\0cd>\0ex>\0" "/er>\0/as>\0/cs>\0/cd>\0/ex>\0" "note>\0/note>\0blockquote>\0/blockquote>\0"; while (*ignore) { l=StrLen(ignore); if (!StrNCmp(ptr,ignore,l)) { ptr+=l; break; } else ignore+=l+1; } if (!*ignore) break; } nentry_done: *out_ptr++=0; res=StrNew(buf); *_ptr=ptr-1; return res; } I64 ACDCompareWords(U8 *e1,U8 *e2) { return StrICmp(e1,e2); } U8 *ACDSortWords(U8 *start,I64 size,I64 word_cnt) { U8 **ptr_array=MAlloc(sizeof(U8 *)*word_cnt), *out_start=MAlloc(size), *ptr=start,*ptr2; I64 i=0; while (*ptr) { ptr_array[i++]=ptr; ptr+=StrLen(ptr)+3; } "Sorting...\n"; Sleep(100); QSortI64(ptr_array,word_cnt,&ACDCompareWords); "Done...\n"; Sleep(100); ptr=out_start; for (i=0;i<word_cnt;i++) { ptr2=ptr_array[i]; while (*ptr2) *ptr++=*ptr2++; *ptr++=*ptr2++; //zero *ptr++=*ptr2++; //blk lo *ptr++=*ptr2++; //blk hi } *ptr++=0; return out_start; } U0 ACDGen(U8 *in_file) { I64 cmd,size,word_cnt=0,largest_entry=0; U8 *st,*in_ptr=FileRead(in_file,&size),*in_start=in_ptr, *out_ptr=MAlloc(size),*out_start=out_ptr, *word_ptr=MAlloc(size),*word_start=word_ptr, *last_word="",*def_word_start=out_ptr, *sorted_word_start; U16 *d; if (!in_ptr) return; do { cmd=ACDNextCmd(&in_ptr); if (cmd==ACD_H1) { next_word: if (out_ptr-def_word_start>largest_entry) largest_entry=out_ptr-def_word_start; def_word_start=out_ptr; if (st=ACDNextEntry(&in_ptr)) { if (*st) { if (StrICmp(st,last_word)) { word_cnt++; *word_ptr++=ACD_WORD_CHAR; last_word=word_ptr; StrCpy(word_ptr,st); word_ptr+=StrLen(st)+1; d=word_ptr; *d=(out_ptr-out_start)/ACD_BLK_SIZE; word_ptr+=2; *out_ptr++=ACD_WORD_CHAR; StrCpy(out_ptr,st); out_ptr+=StrLen(st)+1; } Free(st); do { do { cmd=ACDNextCmd(&in_ptr); if (cmd==ACD_H1) goto next_word; } while (cmd>=0 && !(cmd==ACD_DEF||cmd==ACD_PRONUNCIATION|| cmd==ACD_POS||cmd==ACD_EXTRA)); if (cmd==ACD_DEF) { if(st=ACDNextEntry(&in_ptr)) { if (*st) { *out_ptr++=ACD_DEF_CHAR; StrCpy(out_ptr,st); out_ptr+=StrLen(st)+1; } Free(st); } } else if (cmd==ACD_PRONUNCIATION) { if(st=ACDNextEntry(&in_ptr)) { if (*st) { *out_ptr++=ACD_PRONUNCIATION_CHAR; StrCpy(out_ptr,st); out_ptr+=StrLen(st)+1; } Free(st); } } else if (cmd==ACD_POS) { if(st=ACDNextEntry(&in_ptr)) { if (*st) { *out_ptr++=ACD_POS_CHAR; StrCpy(out_ptr,st); out_ptr+=StrLen(st)+1; } Free(st); } } else if (cmd==ACD_EXTRA) { if(st=ACDNextEntry(&in_ptr)) { if (*st) { *out_ptr++=ACD_EXTRA_CHAR; StrCpy(out_ptr,st); out_ptr+=StrLen(st)+1; } Free(st); } } } while (cmd==ACD_DEF||cmd==ACD_PRONUNCIATION|| cmd==ACD_POS||cmd==ACD_EXTRA); } else Free(st); } } } while (cmd>=0); *out_ptr++=ACD_END_CHAR; *word_ptr++=ACD_END_CHAR; Free(in_start); "Blk Size :%d\n",ACD_BLK_SIZE; "Blk Cnt :%04X\n",(out_ptr-out_start+ACD_BLK_SIZE-1)/ACD_BLK_SIZE; "Largest Entry :%d\n",largest_entry; "Word Count :%d\n",word_cnt; FileWrite(ACD_DEF_FILENAME,out_start,out_ptr-out_start); "Def File Size :%d\n",out_ptr-out_start; sorted_word_start=ACDSortWords(word_start,word_ptr-word_start,word_cnt); FileWrite(ACD_WORD_FILENAME,sorted_word_start,word_ptr-word_start); "Word File Size:%d\n",word_ptr-word_start; Free(out_start); Free(word_start); Free(sorted_word_start); } Cd(__DIR__); ACDPreprocess("DICTIONARY.DD","DICTIONARY2.DD"); ACDGen("DICTIONARY2.DD");