#define DOWNLOAD_FILE1          "TOS_Distro.ISO"
#define DOWNLOAD_FILE1_SIZE     16000000

#define HOURS_MAX               (24*3)

class LogStruct
{
  LogStruct *next,*last;
  LogStruct *ip_num_left,*ip_num_right;
  U32 ip_num,code;
  I64 size;
  U8 *file,*link;
  CDate datetime;
};

class LinkStruct
{
  LinkStruct *left,*right;
  U8 *link,*file;
  I64 cnt;
};

class BlockedStruct
{
  BlockedStruct *next,*last;
  U32 ip_num;
};

U0 LogStructDel(LogStruct *tmplg)
{
  Free(tmplg->file);
  Free(tmplg->link);
  Free(tmplg);
}

U0 PrsSingleLogFile(LogStruct *head,U8 *name,CDate *_dstart,CDate *_dend)
{
  CDoc *doc=DocRead(name,
        DOCF_PLAIN_TEXT_TABS|DOCF_DBL_DOLLARS|DOCF_NO_CURSOR);
  CDocEntry *doc_e=doc->head.next;
  U8 *src,*src2,*mon_lst=Define("ST_MONTHS");
  LogStruct *tmplg;
  CDateStruct ds;
  I64 i;
  "%$Q\n",name;
  while (doc_e!=doc) {
    if (doc_e->type_u8==DOCT_TEXT) {
      tmplg=CAlloc(sizeof(LogStruct));
      try {
        src=doc_e->tag;
        tmplg->ip_num.u8[3]=Str2I64(src,10,&src);
        if (*src++!='.') throw;
        tmplg->ip_num.u8[2]=Str2I64(src,10,&src);
        if (*src++!='.') throw;
        tmplg->ip_num.u8[1]=Str2I64(src,10,&src);
        if (*src++!='.') throw;
        tmplg->ip_num.u8[0]=Str2I64(src,10,&src);

        do if (!*src) throw;
        while (*src++!='[');
        MemSet(&ds,0,sizeof(CDateStruct));
        ds.day_of_mon=Str2I64(src,10,&src);
        if (*src++!='/') throw;
        src2=src;
        do if (!*src2) throw;
        while (*src2++!='/');
        * --src2=0;
        ds.mon=1+LstMatch(src,mon_lst,LMF_IGNORE_CASE);
        src=++src2;
        ds.year=Str2I64(src,10,&src);
        if (*src++!=':') throw;
        ds.hour=Str2I64(src,10,&src);
        if (*src++!=':') throw;
        ds.min=Str2I64(src,10,&src);
        if (*src++!=':') throw;
        ds.sec=Str2I64(src,10,&src);
        tmplg->datetime=Struct2Date(&ds);
        if (*src++!=CH_SPACE) throw;
        i=Str2I64(src,10,&src);
        tmplg->datetime-=(i/100+i%100/60.0)*CDATE_FREQ*60*60;
        if (!(Str2Date("1/1/2017")<=tmplg->datetime<Str2Date("1/1/2050")))
          throw;
        if (tmplg->datetime<*_dstart) *_dstart=tmplg->datetime;
        if (tmplg->datetime>*_dend)   *_dend  =tmplg->datetime;

        do if (!*src) throw;
        while (*src++!=']');

        if (*src++!=CH_SPACE) throw;
        if (*src++!='\"') throw;
        if (!StrNCmp(src,"GET ",4)) {
          src2=src+=4;
          do if (!*src2) throw;
          while (*src2++!=CH_SPACE);
          * --src2=0;
          tmplg->file=StrNew(src);
          src=++src2;

          do if (!*src) throw;
          while (*src++!='\"');

          tmplg->code=Str2I64(src,10,&src);
          if (*src++!=CH_SPACE) throw;
          tmplg->size=Str2I64(src,10,&src);
          if (*src++!=CH_SPACE) throw;

          if (*src++!='\"') throw;
          src2=src;
          do if (!*src2) throw;
          while (*src2++!='\"');
          * --src2=0;
          tmplg->link=StrNew(src);
          src=++src2;

          QueIns(tmplg,head->last);
        } else if (!StrNCmp(src,"HEAD ",5)) {
          LogStructDel(tmplg);
        } else
          throw;
      } catch {
        Fs->catch_except=TRUE;
        "%$Q\n",doc_e->tag;
        LogStructDel(tmplg);
      }
    }
    doc_e=doc_e->next;
  }
  DocDel(doc);
}

LogStruct *PrsLogFiles(U8 *files_find_mask,CDate *_dstart,CDate *_dend)
{
  LogStruct *head=CAlloc(sizeof(LogStruct));
  CDirEntry *tmpde=FilesFind(files_find_mask),*tmpde1=tmpde;
  QueInit(head);
  while (tmpde) {
    PrsSingleLogFile(head,tmpde->full_name,_dstart,_dend);
    tmpde=tmpde->next;
  }
  DirTreeDel(tmpde1);
  return head;
}

U0 LogLstDel(LogStruct *head)
{
  LogStruct *tmplg=head->next,*tmplg1;
  while (tmplg!=head) {
    tmplg1=tmplg->next;
    LogStructDel(tmplg);
    tmplg=tmplg1;
  }
}

U0 BlockedStructAdd(BlockedStruct *head,U32 ip_num)
{
  BlockedStruct *tmpb=CAlloc(sizeof(BlockedStruct));
  tmpb->ip_num=ip_num;
  QueIns(tmpb,head->last);
}

Bool IsBlocked(BlockedStruct *head,U32 ip_num)
{
  BlockedStruct *tmpb=head->next;
  while (tmpb!=head) {
    if (tmpb->ip_num==ip_num)
      return TRUE;
    tmpb=tmpb->next;
  }
  return FALSE;
}

U0 BlockIPNuip(LogStruct *head)
{
  BlockedStruct blocked_head;
  LogStruct *tmplg=head->next,*tmplg1;

  QueInit(&blocked_head);

  BlockedStructAdd(&blocked_head,68<<24+227<<16+61<<8+6);

  //pass 1: collect robot lst
  while (tmplg!=head) {
    if (StrIMatch("ROBOT",tmplg->file) &&
          !IsBlocked(&blocked_head,tmplg->ip_num))
      BlockedStructAdd(&blocked_head,tmplg->ip_num);
    tmplg=tmplg->next;
  }

  //pass 2: removed blocked ip_nuip
  tmplg=head->next;
  while (tmplg!=head) {
    tmplg1=tmplg->next;
    if (IsBlocked(&blocked_head,tmplg->ip_num)) {
      QueRem(tmplg);
      LogStructDel(tmplg);
    }
    tmplg=tmplg1;
  }

  QueDel(&blocked_head);
}

Bool IsDownLoad(LogStruct *tmplg)
{
  if (StrMatch(DOWNLOAD_FILE1,tmplg->file)&&tmplg->size>=
        DOWNLOAD_FILE1_SIZE)
    return TRUE;
  else
    return FALSE;
}

Bool IsIndex(LogStruct *tmplg)
{
  if (!StrCmp(tmplg->file,"/index.html") || !StrCmp(tmplg->file,"/"))
    return TRUE;
  else
    return FALSE;
}

Bool IsKeeper(LogStruct *tmplg,CDate dstart,CDate dend)
{
  if (dstart<=tmplg->datetime<=dend && !StrOcc(tmplg->file,'?') &&
    StrLen(tmplg->file)>2 && 'A'<=tmplg->file[1]<='Z' &&
    tmplg->size && tmplg->file[StrLen(tmplg->file)-1]!='/' &&
    (StrLen(tmplg->file)<3 || MemCmp(&tmplg->file[1],"Wb",2)) &&
    (StrLen(tmplg->file)<7 || MemCmp(&tmplg->file[1],"Family",6))) {
    return TRUE;

}  else
    return FALSE;
}

Bool IPNumTreeAdd(LogStruct **_head,LogStruct *tmplg)
{
  LogStruct *head;
  if (UnusedStk<0x200) {
    PrintErr("Stk Overflow");
    throw;
  }
  if (head=*_head) {
    if (tmplg->ip_num==head->ip_num)
      return TRUE;
    else if (tmplg->ip_num<head->ip_num)
      return IPNumTreeAdd(&head->ip_num_left,tmplg);
    else
      return IPNumTreeAdd(&head->ip_num_right,tmplg);
  } else {
    tmplg->ip_num_left=NULL;
    tmplg->ip_num_right=NULL;
    *_head=tmplg;
    return FALSE;
  }
}

U0 LinkTreeAdd(LinkStruct **_root,LogStruct *tmplg)
{
  I64 i;
  LinkStruct *root,*tmplk;
  if (UnusedStk<0x200) {
    PrintErr("Stk Overflow");
    throw;
  }
  if (root=*_root) {
    if (!(i=StrCmp(tmplg->link,root->link)))
      root->cnt++;
    else if (i<0)
      LinkTreeAdd(&root->left,tmplg);
    else
      LinkTreeAdd(&root->right,tmplg);
  } else {
    tmplk=CAlloc(sizeof(LinkStruct));
    tmplk->link=tmplg->link;
    tmplk->cnt=1;
    *_root=tmplk;
  }
}

U0 FileTreeAdd(LinkStruct **_root,LogStruct *tmplg)
{
  I64 i;
  LinkStruct *root,*tmplk;
  if (UnusedStk<0x200) {
    PrintErr("Stk Overflow");
    throw;
  }
  if (root=*_root) {
    if (!(i=StrCmp(tmplg->file,root->file)))
      root->cnt++;
    else if (i<0)
      FileTreeAdd(&root->left,tmplg);
    else
      FileTreeAdd(&root->right,tmplg);
  } else {
    tmplk=CAlloc(sizeof(LinkStruct));
    tmplk->file=tmplg->file;
    tmplk->cnt=1;
    *_root=tmplk;
  }
}

U0 LinkTreeDel(LinkStruct *root)
{
  if (root) {
    LinkTreeDel(root->left);
    LinkTreeDel(root->right);
    Free(root);
  }
}

U0 LinkTreeTraverse(LinkStruct *root)
{
  if (root) {
    LinkTreeTraverse(root->left);
    "%3d:%$Q\n",root->cnt,root->link;
    LinkTreeTraverse(root->right);
  }
}

U0 FileTreeDel(LinkStruct *root)
{
  if (root) {
    FileTreeDel(root->left);
    FileTreeDel(root->right);
    Free(root);
  }
}

U0 FileTreeTraverse(LinkStruct *root)
{
  if (root) {
    FileTreeTraverse(root->left);
    "%3d:%$Q\n",root->cnt,root->file;
    FileTreeTraverse(root->right);
  }
}

U0 DownLoadRep(LogStruct *head,CDate dstart,CDate dend)
{
  I64 i,j,cnt,dups,
        hours_start,hours_end,*hour_cnts,*dup_cnts,
        days_start,days_end,*day_cnts,*day_dup_cnts;
  LogStruct *tmplg=head->next,*dup_head=NULL;
  LinkStruct *link_root=NULL;
  CDateStruct ds;

  i=dstart*24;hours_start=i.u32[1];
  i=dend  *24;hours_end =i.u32[1];

  days_start=(dstart+local_time_offset)>>32;
  days_end  =(dend+local_time_offset)>>32;

  hour_cnts=CAlloc((hours_end-hours_start+1)*sizeof(I64));
  dup_cnts =CAlloc((hours_end-hours_start+1)*sizeof(I64));
  day_cnts =CAlloc((days_end-days_start+1)*sizeof(I64));
  day_dup_cnts=CAlloc((days_end-days_start+1)*sizeof(I64));
  dups=cnt=0;
  while (tmplg!=head) {
    if (IsKeeper(tmplg,dstart,dend) && IsDownLoad(tmplg)) {
      i=tmplg->datetime*24;
      hour_cnts[i.u32[1]-hours_start]++;
      day_cnts[(tmplg->datetime+local_time_offset)>>32-days_start]++;
      cnt++;
      if (IPNumTreeAdd(&dup_head,tmplg)) {
        day_dup_cnts[(tmplg->datetime+local_time_offset)>>32-days_start]++;
        dup_cnts[i.u32[1]-hours_start]++;
        dups++;
      }
      LinkTreeAdd(&link_root,tmplg);
    }
    tmplg=tmplg->next;
  }

  "\n\nDownloads of /TOS_Distro.ISO\n";
  for (i=dstart;i<=dend;i+=1<<32)
    "%D  Dups:%5d  Total:%5d  Uniques:%5d\n",i,
          day_dup_cnts[(i+local_time_offset)>>32-days_start],
          day_cnts[(i+local_time_offset)>>32-days_start],
          day_cnts[(i+local_time_offset)>>32-days_start]-
          day_dup_cnts[(i+local_time_offset)>>32-days_start];

  "\n\nDownloads of /TOS_Distro.ISO\n"
        "'-' is a dup.  '+' is not a dup.\n";
  if (hours_end-hours_start>=HOURS_MAX)
    i=hours_end-HOURS_MAX+1;
  else
    i=hours_start;
  for (;i<=hours_end;i++) {
    Date2Struct(&ds,i<<32/24+local_time_offset);
    "%D %02d: ",i<<32/24,ds.hour;
    for (j=0;j<dup_cnts[i-hours_start];j++)
      '-';
    for (;j<hour_cnts[i-hours_start];j++)
      '+';
    '\n';
  }
  "Total:%d  Dups:%d  Uniques:%d\n",cnt,dups,cnt-dups;

  "\n\nDownloads of /TOS_Distro.ISO\n";
  LinkTreeTraverse(link_root);
  '\n';

  LinkTreeDel(link_root);
  Free(hour_cnts);
  Free(dup_cnts);
  Free(day_cnts);
  Free(day_dup_cnts);
}

U0 FileRep(LogStruct *head,CDate dstart,CDate dend)
{
  LogStruct *tmplg=head->next;
  LinkStruct *file_root=NULL;
  while (tmplg!=head) {
    if (IsKeeper(tmplg,dstart,dend))
      FileTreeAdd(&file_root,tmplg);
    tmplg=tmplg->next;
  }
  "\n\nFile Hits\n";
  FileTreeTraverse(file_root);
  '\n';
  FileTreeDel(file_root);
}

U0 IndexRep(LogStruct *head,CDate dstart,CDate dend)
{
  I64 i,j,cnt,dups,
        hours_start,hours_end,*hour_cnts,*dup_cnts,
        days_start,days_end,*day_cnts,*day_dup_cnts;
  LogStruct *tmplg=head->next,*dup_head=NULL;
  LinkStruct *link_root=NULL;
  CDateStruct ds;

  i=dstart*24;hours_start=i.u32[1];
  i=dend  *24;hours_end =i.u32[1];

  days_start=(dstart+local_time_offset)>>32;
  days_end  =(dend+local_time_offset)>>32;

  hour_cnts=CAlloc((hours_end-hours_start+1)*sizeof(I64));
  dup_cnts =CAlloc((hours_end-hours_start+1)*sizeof(I64));
  day_cnts =CAlloc((days_end-days_start+1)*sizeof(I64));
  day_dup_cnts=CAlloc((days_end-days_start+1)*sizeof(I64));
  dups=cnt=0;
  while (tmplg!=head) {
    if (IsKeeper(tmplg,dstart,dend) && IsIndex(tmplg)) {
      i=tmplg->datetime*24;
      hour_cnts[i.u32[1]-hours_start]++;
      day_cnts[(tmplg->datetime+local_time_offset)>>32-days_start]++;
      cnt++;
      if (IPNumTreeAdd(&dup_head,tmplg)) {
        day_dup_cnts[(tmplg->datetime+local_time_offset)>>32-days_start]++;
        dup_cnts[i.u32[1]-hours_start]++;
        dups++;
      }
      LinkTreeAdd(&link_root,tmplg);
    }
    tmplg=tmplg->next;
  }

  "\n\nHits on /index.html\n"
        "'-' is a dup.  '+' is not a dup.\n";
  for (i=dstart;i<=dend;i+=1<<32)
    "%D  Dups:%5d  Total:%5d  Uniques:%5d\n",i,
          day_dup_cnts[(i+local_time_offset)>>32-days_start],
          day_cnts[(i+local_time_offset)>>32-days_start],
          day_cnts[(i+local_time_offset)>>32-days_start]-
          day_dup_cnts[(i+local_time_offset)>>32-days_start];

  "\n\nHits on /index.html\n";
  if (hours_end-hours_start>=HOURS_MAX)
    i=hours_end-HOURS_MAX+1;
  else
    i=hours_start;
  for (;i<=hours_end;i++) {
    Date2Struct(&ds,i<<32/24+local_time_offset);
    "%D %02d: ",i<<32/24,ds.hour;
    for (j=0;j<dup_cnts[i-hours_start];j++)
      '-';
    for (;j<hour_cnts[i-hours_start];j++)
      '+';
    '\n';
  }
  "Total:%d  Dups:%d  Uniques:%d\n",cnt,dups,cnt-dups;

  "\n\nHits on /index.html\n";
  LinkTreeTraverse(link_root);
  '\n';

  LinkTreeDel(link_root);
  Free(hour_cnts);
  Free(dup_cnts);
  Free(day_cnts);
  Free(day_dup_cnts);
}

U0 WebLogRep(U8 *mask,U8 *output_filename)
{
  LogStruct *head;
  CDate dstart=I64_MAX,dend=I64_MIN;

  DocMax;

  head=PrsLogFiles(mask,&dstart,&dend);
  if (dstart>dend)
    PrintErr("No Data.\n");
  else {
    dstart=GetDate("Start(%D):",dstart);
    dend  =GetDate("End  (%D):",dend);
    BlockIPNuip(head);

    DocClear;
    "$WW,0$";
    IndexRep(head,dstart,dend);
    FileRep(head,dstart,dend);
    DownLoadRep(head,dstart,dend);

    StrCpy(DocPut->filename.name,output_filename);
    DocWrite(DocPut,TRUE);

    "$WW,1$";
  }
  LogLstDel(head);
}

#if __CMD_LINE__
Cd(__DIR__);;
WebLogRep("*.log*","~/DemoWebLog.DD.Z");
#endif