templeos-info/public/Wb/Demo/WebLogDemo/WebLogRep.HC

529 lines
12 KiB
HolyC
Raw Permalink Normal View History

2024-03-16 10:26:19 +00:00
#define DOWNLOAD_FILE1 "TOS_Distro.ISO"
#define DOWNLOAD_FILE1_SIZE 16000000
#define HOURS_MAX (24*3)
class LogStruct
{
LogStruct *next,*last;
LogStruct *ip_num_left,*ip_num_right;
U32 ip_num,code;
I64 size;
U8 *file,*link;
CDate datetime;
};
class LinkStruct
{
LinkStruct *left,*right;
U8 *link,*file;
I64 cnt;
};
class BlockedStruct
{
BlockedStruct *next,*last;
U32 ip_num;
};
U0 LogStructDel(LogStruct *tmplg)
{
Free(tmplg->file);
Free(tmplg->link);
Free(tmplg);
}
U0 PrsSingleLogFile(LogStruct *head,U8 *name,CDate *_dstart,CDate *_dend)
{
CDoc *doc=DocRead(name,
DOCF_PLAIN_TEXT_TABS|DOCF_DBL_DOLLARS|DOCF_NO_CURSOR);
CDocEntry *doc_e=doc->head.next;
U8 *src,*src2,*mon_lst=Define("ST_MONTHS");
LogStruct *tmplg;
CDateStruct ds;
I64 i;
"%$$Q\n",name;
while (doc_e!=doc) {
if (doc_e->type_u8==DOCT_TEXT) {
tmplg=CAlloc(sizeof(LogStruct));
try {
src=doc_e->tag;
tmplg->ip_num.u8[3]=Str2I64(src,10,&src);
if (*src++!='.') throw;
tmplg->ip_num.u8[2]=Str2I64(src,10,&src);
if (*src++!='.') throw;
tmplg->ip_num.u8[1]=Str2I64(src,10,&src);
if (*src++!='.') throw;
tmplg->ip_num.u8[0]=Str2I64(src,10,&src);
do if (!*src) throw;
while (*src++!='[');
MemSet(&ds,0,sizeof(CDateStruct));
ds.day_of_mon=Str2I64(src,10,&src);
if (*src++!='/') throw;
src2=src;
do if (!*src2) throw;
while (*src2++!='/');
* --src2=0;
ds.mon=1+LstMatch(src,mon_lst,LMF_IGNORE_CASE);
src=++src2;
ds.year=Str2I64(src,10,&src);
if (*src++!=':') throw;
ds.hour=Str2I64(src,10,&src);
if (*src++!=':') throw;
ds.min=Str2I64(src,10,&src);
if (*src++!=':') throw;
ds.sec=Str2I64(src,10,&src);
tmplg->datetime=Struct2Date(&ds);
if (*src++!=CH_SPACE) throw;
i=Str2I64(src,10,&src);
tmplg->datetime-=(i/100+i%100/60.0)*CDATE_FREQ*60*60;
if (!(Str2Date("1/1/2017")<=tmplg->datetime<Str2Date("1/1/2050")))
throw;
if (tmplg->datetime<*_dstart) *_dstart=tmplg->datetime;
if (tmplg->datetime>*_dend) *_dend =tmplg->datetime;
do if (!*src) throw;
while (*src++!=']');
if (*src++!=CH_SPACE) throw;
if (*src++!='\"') throw;
if (!StrNCmp(src,"GET ",4)) {
src2=src+=4;
do if (!*src2) throw;
while (*src2++!=CH_SPACE);
* --src2=0;
tmplg->file=StrNew(src);
src=++src2;
do if (!*src) throw;
while (*src++!='\"');
tmplg->code=Str2I64(src,10,&src);
if (*src++!=CH_SPACE) throw;
tmplg->size=Str2I64(src,10,&src);
if (*src++!=CH_SPACE) throw;
if (*src++!='\"') throw;
src2=src;
do if (!*src2) throw;
while (*src2++!='\"');
* --src2=0;
tmplg->link=StrNew(src);
src=++src2;
QueIns(tmplg,head->last);
} else if (!StrNCmp(src,"HEAD ",5)) {
LogStructDel(tmplg);
} else
throw;
} catch {
Fs->catch_except=TRUE;
"%$$Q\n",doc_e->tag;
LogStructDel(tmplg);
}
}
doc_e=doc_e->next;
}
DocDel(doc);
}
LogStruct *PrsLogFiles(U8 *files_find_mask,CDate *_dstart,CDate *_dend)
{
LogStruct *head=CAlloc(sizeof(LogStruct));
CDirEntry *tmpde=FilesFind(files_find_mask),*tmpde1=tmpde;
QueInit(head);
while (tmpde) {
PrsSingleLogFile(head,tmpde->full_name,_dstart,_dend);
tmpde=tmpde->next;
}
DirTreeDel(tmpde1);
return head;
}
U0 LogLstDel(LogStruct *head)
{
LogStruct *tmplg=head->next,*tmplg1;
while (tmplg!=head) {
tmplg1=tmplg->next;
LogStructDel(tmplg);
tmplg=tmplg1;
}
}
U0 BlockedStructAdd(BlockedStruct *head,U32 ip_num)
{
BlockedStruct *tmpb=CAlloc(sizeof(BlockedStruct));
tmpb->ip_num=ip_num;
QueIns(tmpb,head->last);
}
Bool IsBlocked(BlockedStruct *head,U32 ip_num)
{
BlockedStruct *tmpb=head->next;
while (tmpb!=head) {
if (tmpb->ip_num==ip_num)
return TRUE;
tmpb=tmpb->next;
}
return FALSE;
}
U0 BlockIPNuip(LogStruct *head)
{
BlockedStruct blocked_head;
LogStruct *tmplg=head->next,*tmplg1;
QueInit(&blocked_head);
BlockedStructAdd(&blocked_head,68<<24+227<<16+61<<8+6);
//pass 1: collect robot lst
while (tmplg!=head) {
if (StrIMatch("ROBOT",tmplg->file) &&
!IsBlocked(&blocked_head,tmplg->ip_num))
BlockedStructAdd(&blocked_head,tmplg->ip_num);
tmplg=tmplg->next;
}
//pass 2: removed blocked ip_nuip
tmplg=head->next;
while (tmplg!=head) {
tmplg1=tmplg->next;
if (IsBlocked(&blocked_head,tmplg->ip_num)) {
QueRem(tmplg);
LogStructDel(tmplg);
}
tmplg=tmplg1;
}
QueDel(&blocked_head);
}
Bool IsDownLoad(LogStruct *tmplg)
{
if (StrMatch(DOWNLOAD_FILE1,tmplg->file)&&tmplg->size>=
DOWNLOAD_FILE1_SIZE)
return TRUE;
else
return FALSE;
}
Bool IsIndex(LogStruct *tmplg)
{
if (!StrCmp(tmplg->file,"/index.html") || !StrCmp(tmplg->file,"/"))
return TRUE;
else
return FALSE;
}
Bool IsKeeper(LogStruct *tmplg,CDate dstart,CDate dend)
{
if (dstart<=tmplg->datetime<=dend && !StrOcc(tmplg->file,'?') &&
StrLen(tmplg->file)>2 && 'A'<=tmplg->file[1]<='Z' &&
tmplg->size && tmplg->file[StrLen(tmplg->file)-1]!='/' &&
(StrLen(tmplg->file)<3 || MemCmp(&tmplg->file[1],"Wb",2)) &&
(StrLen(tmplg->file)<7 || MemCmp(&tmplg->file[1],"Family",6))) {
return TRUE;
} else
return FALSE;
}
Bool IPNumTreeAdd(LogStruct **_head,LogStruct *tmplg)
{
LogStruct *head;
if (UnusedStk<0x200) {
PrintErr("Stk Overflow");
throw;
}
if (head=*_head) {
if (tmplg->ip_num==head->ip_num)
return TRUE;
else if (tmplg->ip_num<head->ip_num)
return IPNumTreeAdd(&head->ip_num_left,tmplg);
else
return IPNumTreeAdd(&head->ip_num_right,tmplg);
} else {
tmplg->ip_num_left=NULL;
tmplg->ip_num_right=NULL;
*_head=tmplg;
return FALSE;
}
}
U0 LinkTreeAdd(LinkStruct **_root,LogStruct *tmplg)
{
I64 i;
LinkStruct *root,*tmplk;
if (UnusedStk<0x200) {
PrintErr("Stk Overflow");
throw;
}
if (root=*_root) {
if (!(i=StrCmp(tmplg->link,root->link)))
root->cnt++;
else if (i<0)
LinkTreeAdd(&root->left,tmplg);
else
LinkTreeAdd(&root->right,tmplg);
} else {
tmplk=CAlloc(sizeof(LinkStruct));
tmplk->link=tmplg->link;
tmplk->cnt=1;
*_root=tmplk;
}
}
U0 FileTreeAdd(LinkStruct **_root,LogStruct *tmplg)
{
I64 i;
LinkStruct *root,*tmplk;
if (UnusedStk<0x200) {
PrintErr("Stk Overflow");
throw;
}
if (root=*_root) {
if (!(i=StrCmp(tmplg->file,root->file)))
root->cnt++;
else if (i<0)
FileTreeAdd(&root->left,tmplg);
else
FileTreeAdd(&root->right,tmplg);
} else {
tmplk=CAlloc(sizeof(LinkStruct));
tmplk->file=tmplg->file;
tmplk->cnt=1;
*_root=tmplk;
}
}
U0 LinkTreeDel(LinkStruct *root)
{
if (root) {
LinkTreeDel(root->left);
LinkTreeDel(root->right);
Free(root);
}
}
U0 LinkTreeTraverse(LinkStruct *root)
{
if (root) {
LinkTreeTraverse(root->left);
"%3d:%$$Q\n",root->cnt,root->link;
LinkTreeTraverse(root->right);
}
}
U0 FileTreeDel(LinkStruct *root)
{
if (root) {
FileTreeDel(root->left);
FileTreeDel(root->right);
Free(root);
}
}
U0 FileTreeTraverse(LinkStruct *root)
{
if (root) {
FileTreeTraverse(root->left);
"%3d:%$$Q\n",root->cnt,root->file;
FileTreeTraverse(root->right);
}
}
U0 DownLoadRep(LogStruct *head,CDate dstart,CDate dend)
{
I64 i,j,cnt,dups,
hours_start,hours_end,*hour_cnts,*dup_cnts,
days_start,days_end,*day_cnts,*day_dup_cnts;
LogStruct *tmplg=head->next,*dup_head=NULL;
LinkStruct *link_root=NULL;
CDateStruct ds;
i=dstart*24;hours_start=i.u32[1];
i=dend *24;hours_end =i.u32[1];
days_start=(dstart+local_time_offset)>>32;
days_end =(dend+local_time_offset)>>32;
hour_cnts=CAlloc((hours_end-hours_start+1)*sizeof(I64));
dup_cnts =CAlloc((hours_end-hours_start+1)*sizeof(I64));
day_cnts =CAlloc((days_end-days_start+1)*sizeof(I64));
day_dup_cnts=CAlloc((days_end-days_start+1)*sizeof(I64));
dups=cnt=0;
while (tmplg!=head) {
if (IsKeeper(tmplg,dstart,dend) && IsDownLoad(tmplg)) {
i=tmplg->datetime*24;
hour_cnts[i.u32[1]-hours_start]++;
day_cnts[(tmplg->datetime+local_time_offset)>>32-days_start]++;
cnt++;
if (IPNumTreeAdd(&dup_head,tmplg)) {
day_dup_cnts[(tmplg->datetime+local_time_offset)>>32-days_start]++;
dup_cnts[i.u32[1]-hours_start]++;
dups++;
}
LinkTreeAdd(&link_root,tmplg);
}
tmplg=tmplg->next;
}
"\n\nDownloads of /TOS_Distro.ISO\n";
for (i=dstart;i<=dend;i+=1<<32)
"%DDups:%5dTotal:%5dUniques:%5d\n",i,
day_dup_cnts[(i+local_time_offset)>>32-days_start],
day_cnts[(i+local_time_offset)>>32-days_start],
day_cnts[(i+local_time_offset)>>32-days_start]-
day_dup_cnts[(i+local_time_offset)>>32-days_start];
"\n\nDownloads of /TOS_Distro.ISO\n"
"'-' is a dup.'+' is not a dup.\n";
if (hours_end-hours_start>=HOURS_MAX)
i=hours_end-HOURS_MAX+1;
else
i=hours_start;
for (;i<=hours_end;i++) {
Date2Struct(&ds,i<<32/24+local_time_offset);
"%D %02d: ",i<<32/24,ds.hour;
for (j=0;j<dup_cnts[i-hours_start];j++)
'-';
for (;j<hour_cnts[i-hours_start];j++)
'+';
'\n';
}
"Total:%dDups:%dUniques:%d\n",cnt,dups,cnt-dups;
"\n\nDownloads of /TOS_Distro.ISO\n";
LinkTreeTraverse(link_root);
'\n';
LinkTreeDel(link_root);
Free(hour_cnts);
Free(dup_cnts);
Free(day_cnts);
Free(day_dup_cnts);
}
U0 FileRep(LogStruct *head,CDate dstart,CDate dend)
{
LogStruct *tmplg=head->next;
LinkStruct *file_root=NULL;
while (tmplg!=head) {
if (IsKeeper(tmplg,dstart,dend))
FileTreeAdd(&file_root,tmplg);
tmplg=tmplg->next;
}
"\n\nFile Hits\n";
FileTreeTraverse(file_root);
'\n';
FileTreeDel(file_root);
}
U0 IndexRep(LogStruct *head,CDate dstart,CDate dend)
{
I64 i,j,cnt,dups,
hours_start,hours_end,*hour_cnts,*dup_cnts,
days_start,days_end,*day_cnts,*day_dup_cnts;
LogStruct *tmplg=head->next,*dup_head=NULL;
LinkStruct *link_root=NULL;
CDateStruct ds;
i=dstart*24;hours_start=i.u32[1];
i=dend *24;hours_end =i.u32[1];
days_start=(dstart+local_time_offset)>>32;
days_end =(dend+local_time_offset)>>32;
hour_cnts=CAlloc((hours_end-hours_start+1)*sizeof(I64));
dup_cnts =CAlloc((hours_end-hours_start+1)*sizeof(I64));
day_cnts =CAlloc((days_end-days_start+1)*sizeof(I64));
day_dup_cnts=CAlloc((days_end-days_start+1)*sizeof(I64));
dups=cnt=0;
while (tmplg!=head) {
if (IsKeeper(tmplg,dstart,dend) && IsIndex(tmplg)) {
i=tmplg->datetime*24;
hour_cnts[i.u32[1]-hours_start]++;
day_cnts[(tmplg->datetime+local_time_offset)>>32-days_start]++;
cnt++;
if (IPNumTreeAdd(&dup_head,tmplg)) {
day_dup_cnts[(tmplg->datetime+local_time_offset)>>32-days_start]++;
dup_cnts[i.u32[1]-hours_start]++;
dups++;
}
LinkTreeAdd(&link_root,tmplg);
}
tmplg=tmplg->next;
}
"\n\nHits on /index.html\n"
"'-' is a dup.'+' is not a dup.\n";
for (i=dstart;i<=dend;i+=1<<32)
"%DDups:%5dTotal:%5dUniques:%5d\n",i,
day_dup_cnts[(i+local_time_offset)>>32-days_start],
day_cnts[(i+local_time_offset)>>32-days_start],
day_cnts[(i+local_time_offset)>>32-days_start]-
day_dup_cnts[(i+local_time_offset)>>32-days_start];
"\n\nHits on /index.html\n";
if (hours_end-hours_start>=HOURS_MAX)
i=hours_end-HOURS_MAX+1;
else
i=hours_start;
for (;i<=hours_end;i++) {
Date2Struct(&ds,i<<32/24+local_time_offset);
"%D %02d: ",i<<32/24,ds.hour;
for (j=0;j<dup_cnts[i-hours_start];j++)
'-';
for (;j<hour_cnts[i-hours_start];j++)
'+';
'\n';
}
"Total:%dDups:%dUniques:%d\n",cnt,dups,cnt-dups;
"\n\nHits on /index.html\n";
LinkTreeTraverse(link_root);
'\n';
LinkTreeDel(link_root);
Free(hour_cnts);
Free(dup_cnts);
Free(day_cnts);
Free(day_dup_cnts);
}
U0 WebLogRep(U8 *mask,U8 *output_filename)
{
LogStruct *head;
CDate dstart=I64_MAX,dend=I64_MIN;
DocMax;
head=PrsLogFiles(mask,&dstart,&dend);
if (dstart>dend)
PrintErr("No Data.\n");
else {
dstart=GetDate("Start(%D):",dstart);
dend =GetDate("End(%D):",dend);
BlockIPNuip(head);
DocClear;
"$$WW,0$$";
IndexRep(head,dstart,dend);
FileRep(head,dstart,dend);
DownLoadRep(head,dstart,dend);
StrCpy(DocPut->filename.name,output_filename);
DocWrite(DocPut,TRUE);
"$$WW,1$$";
}
LogLstDel(head);
}
#if __CMD_LINE__
Cd(__DIR__);;
WebLogRep("*.log*","~/DemoWebLog.DD.Z");
#endif