templeos-info/public/Wb/Demo/WebLogDemo/WebLogRep.HC

529 lines
12 KiB
HolyC
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#define DOWNLOAD_FILE1 "TOS_Distro.ISO"
#define DOWNLOAD_FILE1_SIZE 16000000
#define HOURS_MAX (24*3)
class LogStruct
{
LogStruct *next,*last;
LogStruct *ip_num_left,*ip_num_right;
U32 ip_num,code;
I64 size;
U8 *file,*link;
CDate datetime;
};
class LinkStruct
{
LinkStruct *left,*right;
U8 *link,*file;
I64 cnt;
};
class BlockedStruct
{
BlockedStruct *next,*last;
U32 ip_num;
};
U0 LogStructDel(LogStruct *tmplg)
{
Free(tmplg->file);
Free(tmplg->link);
Free(tmplg);
}
U0 PrsSingleLogFile(LogStruct *head,U8 *name,CDate *_dstart,CDate *_dend)
{
CDoc *doc=DocRead(name,
DOCF_PLAIN_TEXT_TABS|DOCF_DBL_DOLLARS|DOCF_NO_CURSOR);
CDocEntry *doc_e=doc->head.next;
U8 *src,*src2,*mon_lst=Define("ST_MONTHS");
LogStruct *tmplg;
CDateStruct ds;
I64 i;
"%$$Q\n",name;
while (doc_e!=doc) {
if (doc_e->type_u8==DOCT_TEXT) {
tmplg=CAlloc(sizeof(LogStruct));
try {
src=doc_e->tag;
tmplg->ip_num.u8[3]=Str2I64(src,10,&src);
if (*src++!='.') throw;
tmplg->ip_num.u8[2]=Str2I64(src,10,&src);
if (*src++!='.') throw;
tmplg->ip_num.u8[1]=Str2I64(src,10,&src);
if (*src++!='.') throw;
tmplg->ip_num.u8[0]=Str2I64(src,10,&src);
do if (!*src) throw;
while (*src++!='[');
MemSet(&ds,0,sizeof(CDateStruct));
ds.day_of_mon=Str2I64(src,10,&src);
if (*src++!='/') throw;
src2=src;
do if (!*src2) throw;
while (*src2++!='/');
* --src2=0;
ds.mon=1+LstMatch(src,mon_lst,LMF_IGNORE_CASE);
src=++src2;
ds.year=Str2I64(src,10,&src);
if (*src++!=':') throw;
ds.hour=Str2I64(src,10,&src);
if (*src++!=':') throw;
ds.min=Str2I64(src,10,&src);
if (*src++!=':') throw;
ds.sec=Str2I64(src,10,&src);
tmplg->datetime=Struct2Date(&ds);
if (*src++!=CH_SPACE) throw;
i=Str2I64(src,10,&src);
tmplg->datetime-=(i/100+i%100/60.0)*CDATE_FREQ*60*60;
if (!(Str2Date("1/1/2017")<=tmplg->datetime<Str2Date("1/1/2050")))
throw;
if (tmplg->datetime<*_dstart) *_dstart=tmplg->datetime;
if (tmplg->datetime>*_dend) *_dend =tmplg->datetime;
do if (!*src) throw;
while (*src++!=']');
if (*src++!=CH_SPACE) throw;
if (*src++!='\"') throw;
if (!StrNCmp(src,"GET ",4)) {
src2=src+=4;
do if (!*src2) throw;
while (*src2++!=CH_SPACE);
* --src2=0;
tmplg->file=StrNew(src);
src=++src2;
do if (!*src) throw;
while (*src++!='\"');
tmplg->code=Str2I64(src,10,&src);
if (*src++!=CH_SPACE) throw;
tmplg->size=Str2I64(src,10,&src);
if (*src++!=CH_SPACE) throw;
if (*src++!='\"') throw;
src2=src;
do if (!*src2) throw;
while (*src2++!='\"');
* --src2=0;
tmplg->link=StrNew(src);
src=++src2;
QueIns(tmplg,head->last);
} else if (!StrNCmp(src,"HEAD ",5)) {
LogStructDel(tmplg);
} else
throw;
} catch {
Fs->catch_except=TRUE;
"%$$Q\n",doc_e->tag;
LogStructDel(tmplg);
}
}
doc_e=doc_e->next;
}
DocDel(doc);
}
LogStruct *PrsLogFiles(U8 *files_find_mask,CDate *_dstart,CDate *_dend)
{
LogStruct *head=CAlloc(sizeof(LogStruct));
CDirEntry *tmpde=FilesFind(files_find_mask),*tmpde1=tmpde;
QueInit(head);
while (tmpde) {
PrsSingleLogFile(head,tmpde->full_name,_dstart,_dend);
tmpde=tmpde->next;
}
DirTreeDel(tmpde1);
return head;
}
U0 LogLstDel(LogStruct *head)
{
LogStruct *tmplg=head->next,*tmplg1;
while (tmplg!=head) {
tmplg1=tmplg->next;
LogStructDel(tmplg);
tmplg=tmplg1;
}
}
U0 BlockedStructAdd(BlockedStruct *head,U32 ip_num)
{
BlockedStruct *tmpb=CAlloc(sizeof(BlockedStruct));
tmpb->ip_num=ip_num;
QueIns(tmpb,head->last);
}
Bool IsBlocked(BlockedStruct *head,U32 ip_num)
{
BlockedStruct *tmpb=head->next;
while (tmpb!=head) {
if (tmpb->ip_num==ip_num)
return TRUE;
tmpb=tmpb->next;
}
return FALSE;
}
U0 BlockIPNuip(LogStruct *head)
{
BlockedStruct blocked_head;
LogStruct *tmplg=head->next,*tmplg1;
QueInit(&blocked_head);
BlockedStructAdd(&blocked_head,68<<24+227<<16+61<<8+6);
//pass 1: collect robot lst
while (tmplg!=head) {
if (StrIMatch("ROBOT",tmplg->file) &&
!IsBlocked(&blocked_head,tmplg->ip_num))
BlockedStructAdd(&blocked_head,tmplg->ip_num);
tmplg=tmplg->next;
}
//pass 2: removed blocked ip_nuip
tmplg=head->next;
while (tmplg!=head) {
tmplg1=tmplg->next;
if (IsBlocked(&blocked_head,tmplg->ip_num)) {
QueRem(tmplg);
LogStructDel(tmplg);
}
tmplg=tmplg1;
}
QueDel(&blocked_head);
}
Bool IsDownLoad(LogStruct *tmplg)
{
if (StrMatch(DOWNLOAD_FILE1,tmplg->file)&&tmplg->size>=
DOWNLOAD_FILE1_SIZE)
return TRUE;
else
return FALSE;
}
Bool IsIndex(LogStruct *tmplg)
{
if (!StrCmp(tmplg->file,"/index.html") || !StrCmp(tmplg->file,"/"))
return TRUE;
else
return FALSE;
}
Bool IsKeeper(LogStruct *tmplg,CDate dstart,CDate dend)
{
if (dstart<=tmplg->datetime<=dend && !StrOcc(tmplg->file,'?') &&
StrLen(tmplg->file)>2 && 'A'<=tmplg->file[1]<='Z' &&
tmplg->size && tmplg->file[StrLen(tmplg->file)-1]!='/' &&
(StrLen(tmplg->file)<3 || MemCmp(&tmplg->file[1],"Wb",2)) &&
(StrLen(tmplg->file)<7 || MemCmp(&tmplg->file[1],"Family",6))) {
return TRUE;
} else
return FALSE;
}
Bool IPNumTreeAdd(LogStruct **_head,LogStruct *tmplg)
{
LogStruct *head;
if (UnusedStk<0x200) {
PrintErr("Stk Overflow");
throw;
}
if (head=*_head) {
if (tmplg->ip_num==head->ip_num)
return TRUE;
else if (tmplg->ip_num<head->ip_num)
return IPNumTreeAdd(&head->ip_num_left,tmplg);
else
return IPNumTreeAdd(&head->ip_num_right,tmplg);
} else {
tmplg->ip_num_left=NULL;
tmplg->ip_num_right=NULL;
*_head=tmplg;
return FALSE;
}
}
U0 LinkTreeAdd(LinkStruct **_root,LogStruct *tmplg)
{
I64 i;
LinkStruct *root,*tmplk;
if (UnusedStk<0x200) {
PrintErr("Stk Overflow");
throw;
}
if (root=*_root) {
if (!(i=StrCmp(tmplg->link,root->link)))
root->cnt++;
else if (i<0)
LinkTreeAdd(&root->left,tmplg);
else
LinkTreeAdd(&root->right,tmplg);
} else {
tmplk=CAlloc(sizeof(LinkStruct));
tmplk->link=tmplg->link;
tmplk->cnt=1;
*_root=tmplk;
}
}
U0 FileTreeAdd(LinkStruct **_root,LogStruct *tmplg)
{
I64 i;
LinkStruct *root,*tmplk;
if (UnusedStk<0x200) {
PrintErr("Stk Overflow");
throw;
}
if (root=*_root) {
if (!(i=StrCmp(tmplg->file,root->file)))
root->cnt++;
else if (i<0)
FileTreeAdd(&root->left,tmplg);
else
FileTreeAdd(&root->right,tmplg);
} else {
tmplk=CAlloc(sizeof(LinkStruct));
tmplk->file=tmplg->file;
tmplk->cnt=1;
*_root=tmplk;
}
}
U0 LinkTreeDel(LinkStruct *root)
{
if (root) {
LinkTreeDel(root->left);
LinkTreeDel(root->right);
Free(root);
}
}
U0 LinkTreeTraverse(LinkStruct *root)
{
if (root) {
LinkTreeTraverse(root->left);
"%3d:%$$Q\n",root->cnt,root->link;
LinkTreeTraverse(root->right);
}
}
U0 FileTreeDel(LinkStruct *root)
{
if (root) {
FileTreeDel(root->left);
FileTreeDel(root->right);
Free(root);
}
}
U0 FileTreeTraverse(LinkStruct *root)
{
if (root) {
FileTreeTraverse(root->left);
"%3d:%$$Q\n",root->cnt,root->file;
FileTreeTraverse(root->right);
}
}
U0 DownLoadRep(LogStruct *head,CDate dstart,CDate dend)
{
I64 i,j,cnt,dups,
hours_start,hours_end,*hour_cnts,*dup_cnts,
days_start,days_end,*day_cnts,*day_dup_cnts;
LogStruct *tmplg=head->next,*dup_head=NULL;
LinkStruct *link_root=NULL;
CDateStruct ds;
i=dstart*24;hours_start=i.u32[1];
i=dend *24;hours_end =i.u32[1];
days_start=(dstart+local_time_offset)>>32;
days_end =(dend+local_time_offset)>>32;
hour_cnts=CAlloc((hours_end-hours_start+1)*sizeof(I64));
dup_cnts =CAlloc((hours_end-hours_start+1)*sizeof(I64));
day_cnts =CAlloc((days_end-days_start+1)*sizeof(I64));
day_dup_cnts=CAlloc((days_end-days_start+1)*sizeof(I64));
dups=cnt=0;
while (tmplg!=head) {
if (IsKeeper(tmplg,dstart,dend) && IsDownLoad(tmplg)) {
i=tmplg->datetime*24;
hour_cnts[i.u32[1]-hours_start]++;
day_cnts[(tmplg->datetime+local_time_offset)>>32-days_start]++;
cnt++;
if (IPNumTreeAdd(&dup_head,tmplg)) {
day_dup_cnts[(tmplg->datetime+local_time_offset)>>32-days_start]++;
dup_cnts[i.u32[1]-hours_start]++;
dups++;
}
LinkTreeAdd(&link_root,tmplg);
}
tmplg=tmplg->next;
}
"\n\nDownloads of /TOS_Distro.ISO\n";
for (i=dstart;i<=dend;i+=1<<32)
"%DDups:%5dTotal:%5dUniques:%5d\n",i,
day_dup_cnts[(i+local_time_offset)>>32-days_start],
day_cnts[(i+local_time_offset)>>32-days_start],
day_cnts[(i+local_time_offset)>>32-days_start]-
day_dup_cnts[(i+local_time_offset)>>32-days_start];
"\n\nDownloads of /TOS_Distro.ISO\n"
"'-' is a dup.'+' is not a dup.\n";
if (hours_end-hours_start>=HOURS_MAX)
i=hours_end-HOURS_MAX+1;
else
i=hours_start;
for (;i<=hours_end;i++) {
Date2Struct(&ds,i<<32/24+local_time_offset);
"%D %02d: ",i<<32/24,ds.hour;
for (j=0;j<dup_cnts[i-hours_start];j++)
'-';
for (;j<hour_cnts[i-hours_start];j++)
'+';
'\n';
}
"Total:%dDups:%dUniques:%d\n",cnt,dups,cnt-dups;
"\n\nDownloads of /TOS_Distro.ISO\n";
LinkTreeTraverse(link_root);
'\n';
LinkTreeDel(link_root);
Free(hour_cnts);
Free(dup_cnts);
Free(day_cnts);
Free(day_dup_cnts);
}
U0 FileRep(LogStruct *head,CDate dstart,CDate dend)
{
LogStruct *tmplg=head->next;
LinkStruct *file_root=NULL;
while (tmplg!=head) {
if (IsKeeper(tmplg,dstart,dend))
FileTreeAdd(&file_root,tmplg);
tmplg=tmplg->next;
}
"\n\nFile Hits\n";
FileTreeTraverse(file_root);
'\n';
FileTreeDel(file_root);
}
U0 IndexRep(LogStruct *head,CDate dstart,CDate dend)
{
I64 i,j,cnt,dups,
hours_start,hours_end,*hour_cnts,*dup_cnts,
days_start,days_end,*day_cnts,*day_dup_cnts;
LogStruct *tmplg=head->next,*dup_head=NULL;
LinkStruct *link_root=NULL;
CDateStruct ds;
i=dstart*24;hours_start=i.u32[1];
i=dend *24;hours_end =i.u32[1];
days_start=(dstart+local_time_offset)>>32;
days_end =(dend+local_time_offset)>>32;
hour_cnts=CAlloc((hours_end-hours_start+1)*sizeof(I64));
dup_cnts =CAlloc((hours_end-hours_start+1)*sizeof(I64));
day_cnts =CAlloc((days_end-days_start+1)*sizeof(I64));
day_dup_cnts=CAlloc((days_end-days_start+1)*sizeof(I64));
dups=cnt=0;
while (tmplg!=head) {
if (IsKeeper(tmplg,dstart,dend) && IsIndex(tmplg)) {
i=tmplg->datetime*24;
hour_cnts[i.u32[1]-hours_start]++;
day_cnts[(tmplg->datetime+local_time_offset)>>32-days_start]++;
cnt++;
if (IPNumTreeAdd(&dup_head,tmplg)) {
day_dup_cnts[(tmplg->datetime+local_time_offset)>>32-days_start]++;
dup_cnts[i.u32[1]-hours_start]++;
dups++;
}
LinkTreeAdd(&link_root,tmplg);
}
tmplg=tmplg->next;
}
"\n\nHits on /index.html\n"
"'-' is a dup.'+' is not a dup.\n";
for (i=dstart;i<=dend;i+=1<<32)
"%DDups:%5dTotal:%5dUniques:%5d\n",i,
day_dup_cnts[(i+local_time_offset)>>32-days_start],
day_cnts[(i+local_time_offset)>>32-days_start],
day_cnts[(i+local_time_offset)>>32-days_start]-
day_dup_cnts[(i+local_time_offset)>>32-days_start];
"\n\nHits on /index.html\n";
if (hours_end-hours_start>=HOURS_MAX)
i=hours_end-HOURS_MAX+1;
else
i=hours_start;
for (;i<=hours_end;i++) {
Date2Struct(&ds,i<<32/24+local_time_offset);
"%D %02d: ",i<<32/24,ds.hour;
for (j=0;j<dup_cnts[i-hours_start];j++)
'-';
for (;j<hour_cnts[i-hours_start];j++)
'+';
'\n';
}
"Total:%dDups:%dUniques:%d\n",cnt,dups,cnt-dups;
"\n\nHits on /index.html\n";
LinkTreeTraverse(link_root);
'\n';
LinkTreeDel(link_root);
Free(hour_cnts);
Free(dup_cnts);
Free(day_cnts);
Free(day_dup_cnts);
}
U0 WebLogRep(U8 *mask,U8 *output_filename)
{
LogStruct *head;
CDate dstart=I64_MAX,dend=I64_MIN;
DocMax;
head=PrsLogFiles(mask,&dstart,&dend);
if (dstart>dend)
PrintErr("No Data.\n");
else {
dstart=GetDate("Start(%D):",dstart);
dend =GetDate("End(%D):",dend);
BlockIPNuip(head);
DocClear;
"$$WW,0$$";
IndexRep(head,dstart,dend);
FileRep(head,dstart,dend);
DownLoadRep(head,dstart,dend);
StrCpy(DocPut->filename.name,output_filename);
DocWrite(DocPut,TRUE);
"$$WW,1$$";
}
LogLstDel(head);
}
#if __CMD_LINE__
Cd(__DIR__);;
WebLogRep("*.log*","~/DemoWebLog.DD.Z");
#endif