#define DOWNLOAD_FILE1              "TOS_Distro.ISO"
#define DOWNLOAD_FILE1_SIZE         16000000

#define HOURS_MAX                   (24*3)

class LogStruct
{
    LogStruct   *next, *last;
    LogStruct   *ip_num_left, *ip_num_right;
    U32          ip_num, code;
    I64          size;
    U8          *file, *link;
    CDate        datetime;
};

class LinkStruct
{
    LinkStruct  *left, *right;
    U8          *link, *file;
    I64          count;
};

class BlockedStruct
{
    BlockedStruct   *next, *last;
    U32              ip_num;
};

U0 LogStructDel(LogStruct *tmplg)
{
    Free(tmplg->file);
    Free(tmplg->link);
    Free(tmplg);
}

U0 ParseSingleLogFile(LogStruct *head, U8 *name, CDate *_dstart, CDate *_dend)
{
    CDoc        *doc = DocRead(name, DOCF_PLAIN_TEXT_TABS | DOCF_DBL_DOLLARS | DOCF_NO_CURSOR);
    CDocEntry   *doc_e = doc->head.next;
    U8          *src, *src2, *mon_list = Define("ST_MONTHS");
    LogStruct   *tmplg;
    CDateStruct  ds;
    I64 i;

    "%$Q\n", name;
    while (doc_e != doc)
    {
        if (doc_e->type_u8 == DOCT_TEXT)
        {
            tmplg = CAlloc(sizeof(LogStruct));
            try
            {
                src = doc_e->tag;
                tmplg->ip_num.u8[3] = Str2I64(src, 10, &src);
                if (*src++ != '.') throw;
                tmplg->ip_num.u8[2] = Str2I64(src, 10, &src);
                if (*src++ != '.') throw;
                tmplg->ip_num.u8[1] = Str2I64(src, 10, &src);
                if (*src++ != '.') throw;
                tmplg->ip_num.u8[0] = Str2I64(src, 10, &src);

                do if (!*src) throw;
                while (*src++ != '[');
                MemSet(&ds, 0, sizeof(CDateStruct));
                ds.day_of_mon = Str2I64(src, 10, &src);
                if (*src++ != '/') throw;
                src2 = src;
                do if (!*src2) throw;
                while (*src2++ != '/');
                * --src2 = 0;
                ds.mon = 1 + ListMatch(src, mon_list, LMF_IGNORE_CASE);
                src = ++src2;
                ds.year = Str2I64(src, 10, &src);
                if (*src++ != ':') throw;
                ds.hour = Str2I64(src, 10, &src);
                if (*src++ != ':') throw;
                ds.min = Str2I64(src, 10, &src);
                if (*src++ != ':') throw;
                ds.sec = Str2I64(src, 10, &src);
                tmplg->datetime = Struct2Date(&ds);
                if (*src++ != CH_SPACE) throw;
                i = Str2I64(src, 10, &src);
                tmplg->datetime -= (i / 100 + i % 100 / 60.0) * CDATE_FREQ * 60 * 60;
                if (!(Str2Date("1/1/2017") <= tmplg->datetime < Str2Date("1/1/2050")))
                    throw;
                if (tmplg->datetime < *_dstart) *_dstart = tmplg->datetime;
                if (tmplg->datetime > *_dend)   *_dend   = tmplg->datetime;

                do if (!*src) throw;
                while (*src++ != ']');

                if (*src++ != CH_SPACE) throw;
                if (*src++ != '\"') throw;
                if (!StrNCompare(src, "GET ", 4))
                {
                    src2 = src += 4;
                    do if (!*src2) throw;
                    while (*src2++ != CH_SPACE);
                    * --src2 = 0;
                    tmplg->file = StrNew(src);
                    src = ++src2;

                    do if (!*src) throw;
                    while (*src++ != '\"');

                    tmplg->code = Str2I64(src, 10, &src);
                    if (*src++ != CH_SPACE) throw;
                    tmplg->size = Str2I64(src, 10, &src);
                    if (*src++ != CH_SPACE) throw;

                    if (*src++ != '\"') throw;
                    src2 = src;
                    do if (!*src2) throw;
                    while (*src2++ != '\"');
                    * --src2 = 0;
                    tmplg->link = StrNew(src);
                    src = ++src2;

                    QueueInsert(tmplg, head->last);
                }
                else if (!StrNCompare(src, "HEAD ", 5))
                {
                    LogStructDel(tmplg);
                }
                else
                    throw;
            }
            catch
            {
                Fs->catch_except = TRUE;
                "%$Q\n", doc_e->tag;
                LogStructDel(tmplg);
            }
        }
        doc_e = doc_e->next;
    }
    DocDel(doc);
}

LogStruct *ParseLogFiles(U8 *files_find_mask, CDate *_dstart, CDate *_dend)
{
    LogStruct *head = CAlloc(sizeof(LogStruct));
    CDirEntry *tmpde = FilesFind(files_find_mask), *tmpde1 = tmpde;

    QueueInit(head);
    while (tmpde)
    {
        ParseSingleLogFile(head, tmpde->full_name, _dstart, _dend);
        tmpde = tmpde->next;
    }
    DirTreeDel(tmpde1);

    return head;
}

U0 LogListDel(LogStruct *head)
{
    LogStruct *tmplg = head->next, *tmplg1;

    while (tmplg != head)
    {
        tmplg1 = tmplg->next;
        LogStructDel(tmplg);
        tmplg = tmplg1;
    }
}

U0 BlockedStructAdd(BlockedStruct *head, U32 ip_num)
{
    BlockedStruct *tmpb = CAlloc(sizeof(BlockedStruct));

    tmpb->ip_num = ip_num;
    QueueInsert(tmpb, head->last);
}

Bool IsBlocked(BlockedStruct *head, U32 ip_num)
{
    BlockedStruct *tmpb = head->next;

    while (tmpb != head)
    {
        if (tmpb->ip_num == ip_num)
            return TRUE;
        tmpb = tmpb->next;
    }

    return FALSE;
}

U0 BlockIPNuip(LogStruct *head)
{
    BlockedStruct blocked_head;
    LogStruct    *tmplg = head->next, *tmplg1;

    QueueInit(&blocked_head);

    BlockedStructAdd(&blocked_head, 68 << 24 + 227 << 16 + 61 << 8 + 6);

    //pass 1: collect robot list
    while (tmplg != head)
    {
        if (StrIMatch("ROBOT", tmplg->file) && !IsBlocked(&blocked_head, tmplg->ip_num))
            BlockedStructAdd(&blocked_head, tmplg->ip_num);
        tmplg = tmplg->next;
    }

    //pass 2: removed blocked ip_nuip
    tmplg = head->next;
    while (tmplg != head)
    {
        tmplg1 = tmplg->next;
        if (IsBlocked(&blocked_head, tmplg->ip_num))
        {
            QueueRemove(tmplg);
            LogStructDel(tmplg);
        }
        tmplg = tmplg1;
    }

    QueueDel(&blocked_head);
}

Bool IsDownLoad(LogStruct *tmplg)
{
    if (StrMatch(DOWNLOAD_FILE1, tmplg->file)&&tmplg->size >= DOWNLOAD_FILE1_SIZE)
        return TRUE;
    else
        return FALSE;
}

Bool IsIndex(LogStruct *tmplg)
{
    if (!StrCompare(tmplg->file, "/index.html") || !StrCompare(tmplg->file, "/"))
        return TRUE;
    else
        return FALSE;
}

Bool IsKeeper(LogStruct *tmplg, CDate dstart, CDate dend)
{
    if (dstart <= tmplg->datetime <= dend                                   &&
        !StrOcc(tmplg->file, '?')                                           &&
        StrLen(tmplg->file) > 2                                             &&
        'A' <= tmplg->file[1] <= 'Z'                                        &&
        tmplg->size                                                         &&
        tmplg->file[StrLen(tmplg->file) - 1] != '/'                         &&
        (StrLen(tmplg->file) < 3 || MemCompare(&tmplg->file[1], "Wb", 2))   &&
        (StrLen(tmplg->file) < 7 || MemCompare(&tmplg->file[1], "Family", 6)))
    {
        return TRUE;

}  else
        return FALSE;
}

Bool IPNumTreeAdd(LogStruct **_head, LogStruct *tmplg)
{
    LogStruct *head;

    if (UnusedStack < 0x200)
    {
        PrintErr("Stack Overflow");
        throw;
    }
    if (head = *_head)
    {
        if (tmplg->ip_num == head->ip_num)
            return TRUE;
        else if (tmplg->ip_num < head->ip_num)
            return IPNumTreeAdd(&head->ip_num_left, tmplg);
        else
            return IPNumTreeAdd(&head->ip_num_right, tmplg);
    }
    else
    {
        tmplg->ip_num_left = NULL;
        tmplg->ip_num_right = NULL;
        *_head = tmplg;

        return FALSE;
    }
}

U0 LinkTreeAdd(LinkStruct **_root, LogStruct *tmplg)
{
    I64         i;
    LinkStruct *root, *tmplk;

    if (UnusedStack < 0x200)
    {
        PrintErr("Stack Overflow");
        throw;
    }
    if (root = *_root)
    {
        if (!(i = StrCompare(tmplg->link, root->link)))
            root->count++;
        else if (i < 0)
            LinkTreeAdd(&root->left, tmplg);
        else
            LinkTreeAdd(&root->right, tmplg);
    }
    else
    {
        tmplk = CAlloc(sizeof(LinkStruct));
        tmplk->link  = tmplg->link;
        tmplk->count = 1;
        *_root = tmplk;
    }
}

U0 FileTreeAdd(LinkStruct **_root, LogStruct *tmplg)
{
    I64         i;
    LinkStruct *root, *tmplk;
    if (UnusedStack < 0x200)
    {
        PrintErr("Stack Overflow");
        throw;
    }
    if (root = *_root)
    {
        if (!(i = StrCompare(tmplg->file, root->file)))
            root->count++;
        else if (i < 0)
            FileTreeAdd(&root->left, tmplg);
        else
            FileTreeAdd(&root->right, tmplg);
    }
    else
    {
        tmplk = CAlloc(sizeof(LinkStruct));
        tmplk->file  = tmplg->file;
        tmplk->count = 1;
        *_root = tmplk;
    }
}

U0 LinkTreeDel(LinkStruct *root)
{
    if (root)
    {
        LinkTreeDel(root->left);
        LinkTreeDel(root->right);
        Free(root);
    }
}

U0 LinkTreeTraverse(LinkStruct *root)
{
    if (root)
    {
        LinkTreeTraverse(root->left);
        "%3d:%$Q\n", root->count, root->link;
        LinkTreeTraverse(root->right);
    }
}

U0 FileTreeDel(LinkStruct *root)
{
    if (root)
    {
        FileTreeDel(root->left);
        FileTreeDel(root->right);
        Free(root);
    }
}

U0 FileTreeTraverse(LinkStruct *root)
{
    if (root)
    {
        FileTreeTraverse(root->left);
        "%3d:%$Q\n", root->count, root->file;
        FileTreeTraverse(root->right);
    }
}

U0 DownLoadRep(LogStruct *head, CDate dstart, CDate dend)
{
    I64         i, j, count, dups, 
                hours_start, hours_end, *hour_counts, *dup_counts, 
                days_start, days_end, *day_counts, *day_dup_counts;
    LogStruct  *tmplg = head->next, *dup_head = NULL;
    LinkStruct *link_root = NULL;
    CDateStruct ds;

    i = dstart  * 24;
    hours_start = i.u32[1];
    i = dend    * 24;
    hours_end   = i.u32[1];

    days_start  = (dstart + local_time_offset) >> 32;
    days_end    = (dend   + local_time_offset) >> 32;

    hour_counts     = CAlloc((hours_end - hours_start + 1) * sizeof(I64));
    dup_counts      = CAlloc((hours_end - hours_start + 1) * sizeof(I64));
    day_counts      = CAlloc((days_end  - days_start  + 1) * sizeof(I64));
    day_dup_counts  = CAlloc((days_end  - days_start  + 1) * sizeof(I64));
    dups = count = 0;
    while (tmplg != head)
    {
        if (IsKeeper(tmplg, dstart, dend) && IsDownLoad(tmplg))
        {
            i = tmplg->datetime * 24;
            hour_counts[i.u32[1] - hours_start]++;
            day_counts[(tmplg->datetime + local_time_offset) >> 32 - days_start]++;
            count++;
            if (IPNumTreeAdd(&dup_head, tmplg))
            {
                day_dup_counts[(tmplg->datetime + local_time_offset) >> 32 - days_start]++;
                dup_counts[i.u32[1] - hours_start]++;
                dups++;
            }
            LinkTreeAdd(&link_root, tmplg);
        }
        tmplg = tmplg->next;
    }

    "\n\nDownloads of /TOS_Distro.ISO\n";
    for (i = dstart; i <= dend; i += 1 << 32)
        "%D  Dups:%5d  Total:%5d  Uniques:%5d\n", i, 
        day_dup_counts[(i + local_time_offset) >> 32 - days_start], 
        day_counts[(i + local_time_offset) >> 32 - days_start], 
        day_counts[(i + local_time_offset) >> 32 - days_start] - day_dup_counts[(i + local_time_offset) >> 32 - days_start];

    "\n\nDownloads of /TOS_Distro.ISO\n"
    "'-' is a dup.  '+' is not a dup.\n";
    if (hours_end - hours_start >= HOURS_MAX)
        i = hours_end - HOURS_MAX + 1;
    else
        i = hours_start;
    for (; i <= hours_end; i++)
    {
        Date2Struct(&ds, i << 32 / 24 + local_time_offset);
        "%D %02d: ", i << 32 / 24, ds.hour;
        for (j = 0; j < dup_counts[i - hours_start]; j++)
            '-';
        for (; j < hour_counts[i - hours_start]; j++)
            '+';
        '\n';
    }
    "Total:%d  Dups:%d  Uniques:%d\n", count, dups, count - dups;

    "\n\nDownloads of /TOS_Distro.ISO\n";
    LinkTreeTraverse(link_root);
    '\n';

    LinkTreeDel(link_root);
    Free(hour_counts);
    Free(dup_counts);
    Free(day_counts);
    Free(day_dup_counts);
}

U0 FileRep(LogStruct *head, CDate dstart, CDate dend)
{
    LogStruct  *tmplg = head->next;
    LinkStruct *file_root = NULL;

    while (tmplg != head)
    {
        if (IsKeeper(tmplg, dstart, dend))
            FileTreeAdd(&file_root, tmplg);
        tmplg = tmplg->next;
    }
    "\n\nFile Hits\n";
    FileTreeTraverse(file_root);
    '\n';
    FileTreeDel(file_root);
}

U0 IndexRep(LogStruct *head, CDate dstart, CDate dend)
{
    I64         i, j, count, dups, 
                hours_start, hours_end, *hour_counts, *dup_counts, 
                days_start, days_end, *day_counts, *day_dup_counts;
    LogStruct  *tmplg = head->next, *dup_head = NULL;
    LinkStruct *link_root = NULL;
    CDateStruct ds;

    i = dstart  * 24;
    hours_start = i.u32[1];
    i = dend    * 24;
    hours_end   = i.u32[1];

    days_start  = (dstart + local_time_offset) >> 32;
    days_end    = (dend   + local_time_offset) >> 32;

    hour_counts     = CAlloc((hours_end - hours_start + 1) * sizeof(I64));
    dup_counts      = CAlloc((hours_end - hours_start + 1) * sizeof(I64));
    day_counts      = CAlloc((days_end  - days_start  + 1) * sizeof(I64));
    day_dup_counts  = CAlloc((days_end  - days_start  + 1) * sizeof(I64));
    dups = count = 0;
    while (tmplg != head)
    {
        if (IsKeeper(tmplg, dstart, dend) && IsIndex(tmplg))
        {
            i = tmplg->datetime * 24;
            hour_counts[i.u32[1] - hours_start]++;
            day_counts[(tmplg->datetime + local_time_offset) >> 32 - days_start]++;
            count++;
            if (IPNumTreeAdd(&dup_head, tmplg))
            {
                day_dup_counts[(tmplg->datetime + local_time_offset) >> 32 - days_start]++;
                dup_counts[i.u32[1] - hours_start]++;
                dups++;
            }
            LinkTreeAdd(&link_root, tmplg);
        }
        tmplg = tmplg->next;
    }

    "\n\nHits on /index.html\n"
    "'-' is a dup.  '+' is not a dup.\n";
    for (i = dstart; i <= dend; i += 1 << 32)
        "%D  Dups:%5d  Total:%5d  Uniques:%5d\n", i, 
        day_dup_counts[(i + local_time_offset) >> 32 - days_start], 
        day_counts[(i + local_time_offset) >> 32 - days_start], 
        day_counts[(i + local_time_offset) >> 32 - days_start] - day_dup_counts[(i + local_time_offset) >> 32 - days_start];

    "\n\nHits on /index.html\n";
    if (hours_end - hours_start >= HOURS_MAX)
        i = hours_end - HOURS_MAX + 1;
    else
        i = hours_start;
    for (; i <= hours_end; i++)
    {
        Date2Struct(&ds, i << 32 / 24 + local_time_offset);
        "%D %02d: ", i << 32 / 24, ds.hour;
        for (j = 0; j < dup_counts[i - hours_start];j++)
            '-';
        for (; j < hour_counts[i - hours_start]; j++)
            '+';
        '\n';
    }
    "Total:%d  Dups:%d  Uniques:%d\n", count, dups, count - dups;

    "\n\nHits on /index.html\n";
    LinkTreeTraverse(link_root);
    '\n';

    LinkTreeDel(link_root);
    Free(hour_counts);
    Free(dup_counts);
    Free(day_counts);
    Free(day_dup_counts);
}

U0 WebLogRep(U8 *mask, U8 *output_filename)
{
    LogStruct  *head;
    CDate       dstart = I64_MAX, dend = I64_MIN;

    DocMax;

    head = ParseLogFiles(mask, &dstart, &dend);
    if (dstart > dend)
        PrintErr("No Data.\n");
    else
    {
        dstart  = DateGet("Start(%D):", dstart);
        dend    = DateGet("End  (%D):", dend);
        BlockIPNuip(head);

        DocClear;
        "$WW,0$";
        IndexRep(head, dstart, dend);
        FileRep(head, dstart, dend);
        DownLoadRep(head, dstart, dend);

        StrCopy(DocPut->filename.name, output_filename);
        DocWrite(DocPut, TRUE);

        "$WW,1$";
    }
    LogListDel(head);
}

#if __CMD_LINE__
Cd(__DIR__);;
WebLogRep("*.log*", "~/DemoWebLog.DD");
#endif