templeos-info/public/Wb/Adam/AutoComplete/ACDictGen.HC

262 lines
5.5 KiB
HolyC
Executable File
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
This file is a stand-alone program
which will regenerate processed dictionary
files from a raw Project Gutenberg
dictionary file.
See $LK,"::/Doc/Credits.DD"$.
*/
U0 ACDPreprocess(U8 *in_name,U8 *out_name)
{/*
<cr><nl>--> <nl>
$$ --> $$$$
\'89 --> ‰
*/
I64 ch,i;
U8 *src,*dst;
CDoc *doc;
CDocEntry *doc_e;
if (doc=DocRead(in_name,DOCF_PLAIN_TEXT_TABS|DOCF_DBL_DOLLARS)) {
doc_e=doc->head.next;
while (doc_e!=doc) {
if (doc_e->type_u8==DOCT_TEXT) {
src=dst=doc_e->tag;
while (ch=*src++) {
if (ch=='\\' && *src=='\'') {
src++;
i=0;
ch=ToUpper(*src++);
if ('0'<=ch<='9')
i+=ch-'0';
else if ('A'<=ch<='F')
i+=ch-'A'+10;
i<<=4;
ch=ToUpper(*src++);
if ('0'<=ch<='9')
i+=ch-'0';
else if ('A'<=ch<='F')
i+=ch-'A'+10;
*dst++=i;
} else
*dst++=ch;
}
*dst=0;
}
doc_e=doc_e->next;
}
StrCpy(doc->filename.name,out_name);
DocWrite(doc);
DocDel(doc);
}
}
I64 ACDNextCmd(U8 **_ptr)
{
U8 *ptr=*_ptr,*ptr2;
I64 ch,res=-1;
do {
do {
if (!(ch=*ptr++)) goto ncmd_done;
} while (ch!='<');
ptr2=ptr;
do {
if (!(ch=*ptr2++)) goto ncmd_done;
} while (ch!='>');
*--ptr2=0;
res=LstMatch(ptr,"h1\0/h1\0def\0/def\0hw\0/hw\0tt\0/tt\0"
"ety\0@fld\0@cd\0@blockquote\0@wordforms\0@note\0@altname\0@chform\0"
"@cref\0@syn\0/ety\0@/fld\0@/cd\0@/blockquote\0@/wordforms\0@/note\0"
"@/altname\0@/chform\0@/cref\0@/syn\0");
*ptr2++='>';
ptr=ptr2;
} while (res<0);
ncmd_done:
*_ptr=ptr;
return res;
}
U8 *ACDNextEntry(U8 **_ptr)
{
U8 *res,*ignore,*ptr=*_ptr,buf[ACD_BLK_SIZE],*out_ptr=buf;
I64 ch,l;
while (TRUE) {
while (TRUE) {
if (!(ch=*ptr++)) goto nentry_done;
if (ch!='<') {
*out_ptr++=ch;
if (ch=='$$')
*out_ptr++=ch;
} else
break;
}
ignore="b>\0i>\0ppp>\0/b>\0/i>\0/p>\0"
"ets>\0col>\0spn>\0/ets>\0/col>\0/spn>\0er>\0as>\0cs>\0cd>\0ex>\0"
"/er>\0/as>\0/cs>\0/cd>\0/ex>\0"
"note>\0/note>\0blockquote>\0/blockquote>\0";
while (*ignore) {
l=StrLen(ignore);
if (!StrNCmp(ptr,ignore,l)) {
ptr+=l;
break;
} else
ignore+=l+1;
}
if (!*ignore)
break;
}
nentry_done:
*out_ptr++=0;
res=StrNew(buf);
*_ptr=ptr-1;
return res;
}
I64 ACDCompareWords(U8 *e1,U8 *e2)
{
return StrICmp(e1,e2);
}
U8 *ACDSortWords(U8 *start,I64 size,I64 word_cnt)
{
U8 **ptr_array=MAlloc(sizeof(U8 *)*word_cnt),
*out_start=MAlloc(size),
*ptr=start,*ptr2;
I64 i=0;
while (*ptr) {
ptr_array[i++]=ptr;
ptr+=StrLen(ptr)+3;
}
"Sorting...\n"; Sleep(100);
QSortI64(ptr_array,word_cnt,&ACDCompareWords);
"Done...\n"; Sleep(100);
ptr=out_start;
for (i=0;i<word_cnt;i++) {
ptr2=ptr_array[i];
while (*ptr2)
*ptr++=*ptr2++;
*ptr++=*ptr2++; //zero
*ptr++=*ptr2++; //blk lo
*ptr++=*ptr2++; //blk hi
}
*ptr++=0;
return out_start;
}
U0 ACDGen(U8 *in_file)
{
I64 cmd,size,word_cnt=0,largest_entry=0;
U8 *st,*in_ptr=FileRead(in_file,&size),*in_start=in_ptr,
*out_ptr=MAlloc(size),*out_start=out_ptr,
*word_ptr=MAlloc(size),*word_start=word_ptr,
*last_word="",*def_word_start=out_ptr,
*sorted_word_start;
U16 *d;
if (!in_ptr) return;
do {
cmd=ACDNextCmd(&in_ptr);
if (cmd==ACD_H1) {
next_word:
if (out_ptr-def_word_start>largest_entry)
largest_entry=out_ptr-def_word_start;
def_word_start=out_ptr;
if (st=ACDNextEntry(&in_ptr)) {
if (*st) {
if (StrICmp(st,last_word)) {
word_cnt++;
*word_ptr++=ACD_WORD_CHAR;
last_word=word_ptr;
StrCpy(word_ptr,st);
word_ptr+=StrLen(st)+1;
d=word_ptr;
*d=(out_ptr-out_start)/ACD_BLK_SIZE;
word_ptr+=2;
*out_ptr++=ACD_WORD_CHAR;
StrCpy(out_ptr,st);
out_ptr+=StrLen(st)+1;
}
Free(st);
do {
do {
cmd=ACDNextCmd(&in_ptr);
if (cmd==ACD_H1)
goto next_word;
} while (cmd>=0 && !(cmd==ACD_DEF||cmd==ACD_PRONUNCIATION||
cmd==ACD_POS||cmd==ACD_EXTRA));
if (cmd==ACD_DEF) {
if(st=ACDNextEntry(&in_ptr)) {
if (*st) {
*out_ptr++=ACD_DEF_CHAR;
StrCpy(out_ptr,st);
out_ptr+=StrLen(st)+1;
}
Free(st);
}
} else if (cmd==ACD_PRONUNCIATION) {
if(st=ACDNextEntry(&in_ptr)) {
if (*st) {
*out_ptr++=ACD_PRONUNCIATION_CHAR;
StrCpy(out_ptr,st);
out_ptr+=StrLen(st)+1;
}
Free(st);
}
} else if (cmd==ACD_POS) {
if(st=ACDNextEntry(&in_ptr)) {
if (*st) {
*out_ptr++=ACD_POS_CHAR;
StrCpy(out_ptr,st);
out_ptr+=StrLen(st)+1;
}
Free(st);
}
} else if (cmd==ACD_EXTRA) {
if(st=ACDNextEntry(&in_ptr)) {
if (*st) {
*out_ptr++=ACD_EXTRA_CHAR;
StrCpy(out_ptr,st);
out_ptr+=StrLen(st)+1;
}
Free(st);
}
}
} while (cmd==ACD_DEF||cmd==ACD_PRONUNCIATION||
cmd==ACD_POS||cmd==ACD_EXTRA);
} else
Free(st);
}
}
} while (cmd>=0);
*out_ptr++=ACD_END_CHAR;
*word_ptr++=ACD_END_CHAR;
Free(in_start);
"Blk Size:%d\n",ACD_BLK_SIZE;
"Blk Cnt:%04X\n",(out_ptr-out_start+ACD_BLK_SIZE-1)/ACD_BLK_SIZE;
"Largest Entry:%d\n",largest_entry;
"Word Count:%d\n",word_cnt;
FileWrite(ACD_DEF_FILENAME,out_start,out_ptr-out_start);
"Def File Size:%d\n",out_ptr-out_start;
sorted_word_start=ACDSortWords(word_start,word_ptr-word_start,word_cnt);
FileWrite(ACD_WORD_FILENAME,sorted_word_start,word_ptr-word_start);
"Word File Size:%d\n",word_ptr-word_start;
Free(out_start);
Free(word_start);
Free(sorted_word_start);
}
Cd(__DIR__);
ACDPreprocess("DICTIONARY.DD","DICTIONARY2.DD");
ACDGen("DICTIONARY2.DD");