#define DOWNLOAD_FILE1 "TOS_Distro.ISO" #define DOWNLOAD_FILE1_SIZE 16000000 #define HOURS_MAX (24*3) class LogStruct { LogStruct *next, *last; LogStruct *ip_num_left, *ip_num_right; U32 ip_num, code; I64 size; U8 *file, *link; CDate datetime; }; class LinkStruct { LinkStruct *left, *right; U8 *link, *file; I64 count; }; class BlockedStruct { BlockedStruct *next, *last; U32 ip_num; }; U0 LogStructDel(LogStruct *tmplg) { Free(tmplg->file); Free(tmplg->link); Free(tmplg); } U0 ParseSingleLogFile(LogStruct *head, U8 *name, CDate *_dstart, CDate *_dend) { CDoc *doc = DocRead(name, DOCF_PLAIN_TEXT_TABS | DOCF_DBL_DOLLARS | DOCF_NO_CURSOR); CDocEntry *doc_e = doc->head.next; U8 *src, *src2, *mon_list = Define("ST_MONTHS"); LogStruct *tmplg; CDateStruct ds; I64 i; "%$Q\n", name; while (doc_e != doc) { if (doc_e->type_u8 == DOCT_TEXT) { tmplg = CAlloc(sizeof(LogStruct)); try { src = doc_e->tag; tmplg->ip_num.u8[3] = Str2I64(src, 10, &src); if (*src++ != '.') throw; tmplg->ip_num.u8[2] = Str2I64(src, 10, &src); if (*src++ != '.') throw; tmplg->ip_num.u8[1] = Str2I64(src, 10, &src); if (*src++ != '.') throw; tmplg->ip_num.u8[0] = Str2I64(src, 10, &src); do if (!*src) throw; while (*src++ != '['); MemSet(&ds, 0, sizeof(CDateStruct)); ds.day_of_mon = Str2I64(src, 10, &src); if (*src++ != '/') throw; src2 = src; do if (!*src2) throw; while (*src2++ != '/'); * --src2 = 0; ds.mon = 1 + ListMatch(src, mon_list, LMF_IGNORE_CASE); src = ++src2; ds.year = Str2I64(src, 10, &src); if (*src++ != ':') throw; ds.hour = Str2I64(src, 10, &src); if (*src++ != ':') throw; ds.min = Str2I64(src, 10, &src); if (*src++ != ':') throw; ds.sec = Str2I64(src, 10, &src); tmplg->datetime = Struct2Date(&ds); if (*src++ != CH_SPACE) throw; i = Str2I64(src, 10, &src); tmplg->datetime -= (i / 100 + i % 100 / 60.0) * CDATE_FREQ * 60 * 60; if (!(Str2Date("1/1/2017") <= tmplg->datetime < Str2Date("1/1/2050"))) throw; if (tmplg->datetime < *_dstart) *_dstart = tmplg->datetime; if (tmplg->datetime > *_dend) *_dend = tmplg->datetime; do if (!*src) throw; while (*src++ != ']'); if (*src++ != CH_SPACE) throw; if (*src++ != '\"') throw; if (!StrNCompare(src, "GET ", 4)) { src2 = src += 4; do if (!*src2) throw; while (*src2++ != CH_SPACE); * --src2 = 0; tmplg->file = StrNew(src); src = ++src2; do if (!*src) throw; while (*src++ != '\"'); tmplg->code = Str2I64(src, 10, &src); if (*src++ != CH_SPACE) throw; tmplg->size = Str2I64(src, 10, &src); if (*src++ != CH_SPACE) throw; if (*src++ != '\"') throw; src2 = src; do if (!*src2) throw; while (*src2++ != '\"'); * --src2 = 0; tmplg->link = StrNew(src); src = ++src2; QueueInsert(tmplg, head->last); } else if (!StrNCompare(src, "HEAD ", 5)) { LogStructDel(tmplg); } else throw; } catch { Fs->catch_except = TRUE; "%$Q\n", doc_e->tag; LogStructDel(tmplg); } } doc_e = doc_e->next; } DocDel(doc); } LogStruct *ParseLogFiles(U8 *files_find_mask, CDate *_dstart, CDate *_dend) { LogStruct *head = CAlloc(sizeof(LogStruct)); CDirEntry *tmpde = FilesFind(files_find_mask), *tmpde1 = tmpde; QueueInit(head); while (tmpde) { ParseSingleLogFile(head, tmpde->full_name, _dstart, _dend); tmpde = tmpde->next; } DirTreeDel(tmpde1); return head; } U0 LogListDel(LogStruct *head) { LogStruct *tmplg = head->next, *tmplg1; while (tmplg != head) { tmplg1 = tmplg->next; LogStructDel(tmplg); tmplg = tmplg1; } } U0 BlockedStructAdd(BlockedStruct *head, U32 ip_num) { BlockedStruct *tmpb = CAlloc(sizeof(BlockedStruct)); tmpb->ip_num = ip_num; QueueInsert(tmpb, head->last); } Bool IsBlocked(BlockedStruct *head, U32 ip_num) { BlockedStruct *tmpb = head->next; while (tmpb != head) { if (tmpb->ip_num == ip_num) return TRUE; tmpb = tmpb->next; } return FALSE; } U0 BlockIPNuip(LogStruct *head) { BlockedStruct blocked_head; LogStruct *tmplg = head->next, *tmplg1; QueueInit(&blocked_head); BlockedStructAdd(&blocked_head, 68 << 24 + 227 << 16 + 61 << 8 + 6); //pass 1: collect robot list while (tmplg != head) { if (StrIMatch("ROBOT", tmplg->file) && !IsBlocked(&blocked_head, tmplg->ip_num)) BlockedStructAdd(&blocked_head, tmplg->ip_num); tmplg = tmplg->next; } //pass 2: removed blocked ip_nuip tmplg = head->next; while (tmplg != head) { tmplg1 = tmplg->next; if (IsBlocked(&blocked_head, tmplg->ip_num)) { QueueRemove(tmplg); LogStructDel(tmplg); } tmplg = tmplg1; } QueueDel(&blocked_head); } Bool IsDownLoad(LogStruct *tmplg) { if (StrMatch(DOWNLOAD_FILE1, tmplg->file)&&tmplg->size >= DOWNLOAD_FILE1_SIZE) return TRUE; else return FALSE; } Bool IsIndex(LogStruct *tmplg) { if (!StrCompare(tmplg->file, "/index.html") || !StrCompare(tmplg->file, "/")) return TRUE; else return FALSE; } Bool IsKeeper(LogStruct *tmplg, CDate dstart, CDate dend) { if (dstart <= tmplg->datetime <= dend && !StrOcc(tmplg->file, '?') && StrLen(tmplg->file) > 2 && 'A' <= tmplg->file[1] <= 'Z' && tmplg->size && tmplg->file[StrLen(tmplg->file) - 1] != '/' && (StrLen(tmplg->file) < 3 || MemCompare(&tmplg->file[1], "Wb", 2)) && (StrLen(tmplg->file) < 7 || MemCompare(&tmplg->file[1], "Family", 6))) { return TRUE; } else return FALSE; } Bool IPNumTreeAdd(LogStruct **_head, LogStruct *tmplg) { LogStruct *head; if (UnusedStack < 0x200) { PrintErr("Stack Overflow"); throw; } if (head = *_head) { if (tmplg->ip_num == head->ip_num) return TRUE; else if (tmplg->ip_num < head->ip_num) return IPNumTreeAdd(&head->ip_num_left, tmplg); else return IPNumTreeAdd(&head->ip_num_right, tmplg); } else { tmplg->ip_num_left = NULL; tmplg->ip_num_right = NULL; *_head = tmplg; return FALSE; } } U0 LinkTreeAdd(LinkStruct **_root, LogStruct *tmplg) { I64 i; LinkStruct *root, *tmplk; if (UnusedStack < 0x200) { PrintErr("Stack Overflow"); throw; } if (root = *_root) { if (!(i = StrCompare(tmplg->link, root->link))) root->count++; else if (i < 0) LinkTreeAdd(&root->left, tmplg); else LinkTreeAdd(&root->right, tmplg); } else { tmplk = CAlloc(sizeof(LinkStruct)); tmplk->link = tmplg->link; tmplk->count = 1; *_root = tmplk; } } U0 FileTreeAdd(LinkStruct **_root, LogStruct *tmplg) { I64 i; LinkStruct *root, *tmplk; if (UnusedStack < 0x200) { PrintErr("Stack Overflow"); throw; } if (root = *_root) { if (!(i = StrCompare(tmplg->file, root->file))) root->count++; else if (i < 0) FileTreeAdd(&root->left, tmplg); else FileTreeAdd(&root->right, tmplg); } else { tmplk = CAlloc(sizeof(LinkStruct)); tmplk->file = tmplg->file; tmplk->count = 1; *_root = tmplk; } } U0 LinkTreeDel(LinkStruct *root) { if (root) { LinkTreeDel(root->left); LinkTreeDel(root->right); Free(root); } } U0 LinkTreeTraverse(LinkStruct *root) { if (root) { LinkTreeTraverse(root->left); "%3d:%$Q\n", root->count, root->link; LinkTreeTraverse(root->right); } } U0 FileTreeDel(LinkStruct *root) { if (root) { FileTreeDel(root->left); FileTreeDel(root->right); Free(root); } } U0 FileTreeTraverse(LinkStruct *root) { if (root) { FileTreeTraverse(root->left); "%3d:%$Q\n", root->count, root->file; FileTreeTraverse(root->right); } } U0 DownLoadRep(LogStruct *head, CDate dstart, CDate dend) { I64 i, j, count, dups, hours_start, hours_end, *hour_counts, *dup_counts, days_start, days_end, *day_counts, *day_dup_counts; LogStruct *tmplg = head->next, *dup_head = NULL; LinkStruct *link_root = NULL; CDateStruct ds; i = dstart * 24; hours_start = i.u32[1]; i = dend * 24; hours_end = i.u32[1]; days_start = (dstart + local_time_offset) >> 32; days_end = (dend + local_time_offset) >> 32; hour_counts = CAlloc((hours_end - hours_start + 1) * sizeof(I64)); dup_counts = CAlloc((hours_end - hours_start + 1) * sizeof(I64)); day_counts = CAlloc((days_end - days_start + 1) * sizeof(I64)); day_dup_counts = CAlloc((days_end - days_start + 1) * sizeof(I64)); dups = count = 0; while (tmplg != head) { if (IsKeeper(tmplg, dstart, dend) && IsDownLoad(tmplg)) { i = tmplg->datetime * 24; hour_counts[i.u32[1] - hours_start]++; day_counts[(tmplg->datetime + local_time_offset) >> 32 - days_start]++; count++; if (IPNumTreeAdd(&dup_head, tmplg)) { day_dup_counts[(tmplg->datetime + local_time_offset) >> 32 - days_start]++; dup_counts[i.u32[1] - hours_start]++; dups++; } LinkTreeAdd(&link_root, tmplg); } tmplg = tmplg->next; } "\n\nDownloads of /TOS_Distro.ISO\n"; for (i = dstart; i <= dend; i += 1 << 32) "%D Dups:%5d Total:%5d Uniques:%5d\n", i, day_dup_counts[(i + local_time_offset) >> 32 - days_start], day_counts[(i + local_time_offset) >> 32 - days_start], day_counts[(i + local_time_offset) >> 32 - days_start] - day_dup_counts[(i + local_time_offset) >> 32 - days_start]; "\n\nDownloads of /TOS_Distro.ISO\n" "'-' is a dup. '+' is not a dup.\n"; if (hours_end - hours_start >= HOURS_MAX) i = hours_end - HOURS_MAX + 1; else i = hours_start; for (; i <= hours_end; i++) { Date2Struct(&ds, i << 32 / 24 + local_time_offset); "%D %02d: ", i << 32 / 24, ds.hour; for (j = 0; j < dup_counts[i - hours_start]; j++) '-'; for (; j < hour_counts[i - hours_start]; j++) '+'; '\n'; } "Total:%d Dups:%d Uniques:%d\n", count, dups, count - dups; "\n\nDownloads of /TOS_Distro.ISO\n"; LinkTreeTraverse(link_root); '\n'; LinkTreeDel(link_root); Free(hour_counts); Free(dup_counts); Free(day_counts); Free(day_dup_counts); } U0 FileRep(LogStruct *head, CDate dstart, CDate dend) { LogStruct *tmplg = head->next; LinkStruct *file_root = NULL; while (tmplg != head) { if (IsKeeper(tmplg, dstart, dend)) FileTreeAdd(&file_root, tmplg); tmplg = tmplg->next; } "\n\nFile Hits\n"; FileTreeTraverse(file_root); '\n'; FileTreeDel(file_root); } U0 IndexRep(LogStruct *head, CDate dstart, CDate dend) { I64 i, j, count, dups, hours_start, hours_end, *hour_counts, *dup_counts, days_start, days_end, *day_counts, *day_dup_counts; LogStruct *tmplg = head->next, *dup_head = NULL; LinkStruct *link_root = NULL; CDateStruct ds; i = dstart * 24; hours_start = i.u32[1]; i = dend * 24; hours_end = i.u32[1]; days_start = (dstart + local_time_offset) >> 32; days_end = (dend + local_time_offset) >> 32; hour_counts = CAlloc((hours_end - hours_start + 1) * sizeof(I64)); dup_counts = CAlloc((hours_end - hours_start + 1) * sizeof(I64)); day_counts = CAlloc((days_end - days_start + 1) * sizeof(I64)); day_dup_counts = CAlloc((days_end - days_start + 1) * sizeof(I64)); dups = count = 0; while (tmplg != head) { if (IsKeeper(tmplg, dstart, dend) && IsIndex(tmplg)) { i = tmplg->datetime * 24; hour_counts[i.u32[1] - hours_start]++; day_counts[(tmplg->datetime + local_time_offset) >> 32 - days_start]++; count++; if (IPNumTreeAdd(&dup_head, tmplg)) { day_dup_counts[(tmplg->datetime + local_time_offset) >> 32 - days_start]++; dup_counts[i.u32[1] - hours_start]++; dups++; } LinkTreeAdd(&link_root, tmplg); } tmplg = tmplg->next; } "\n\nHits on /index.html\n" "'-' is a dup. '+' is not a dup.\n"; for (i = dstart; i <= dend; i += 1 << 32) "%D Dups:%5d Total:%5d Uniques:%5d\n", i, day_dup_counts[(i + local_time_offset) >> 32 - days_start], day_counts[(i + local_time_offset) >> 32 - days_start], day_counts[(i + local_time_offset) >> 32 - days_start] - day_dup_counts[(i + local_time_offset) >> 32 - days_start]; "\n\nHits on /index.html\n"; if (hours_end - hours_start >= HOURS_MAX) i = hours_end - HOURS_MAX + 1; else i = hours_start; for (; i <= hours_end; i++) { Date2Struct(&ds, i << 32 / 24 + local_time_offset); "%D %02d: ", i << 32 / 24, ds.hour; for (j = 0; j < dup_counts[i - hours_start];j++) '-'; for (; j < hour_counts[i - hours_start]; j++) '+'; '\n'; } "Total:%d Dups:%d Uniques:%d\n", count, dups, count - dups; "\n\nHits on /index.html\n"; LinkTreeTraverse(link_root); '\n'; LinkTreeDel(link_root); Free(hour_counts); Free(dup_counts); Free(day_counts); Free(day_dup_counts); } U0 WebLogRep(U8 *mask, U8 *output_filename) { LogStruct *head; CDate dstart = I64_MAX, dend = I64_MIN; DocMax; head = ParseLogFiles(mask, &dstart, &dend); if (dstart > dend) PrintErr("No Data.\n"); else { dstart = DateGet("Start(%D):", dstart); dend = DateGet("End (%D):", dend); BlockIPNuip(head); DocClear; "$WW,0$"; IndexRep(head, dstart, dend); FileRep(head, dstart, dend); DownLoadRep(head, dstart, dend); StrCopy(DocPut->filename.name, output_filename); DocWrite(DocPut, TRUE); "$WW,1$"; } LogListDel(head); } #if __CMD_LINE__ Cd(__DIR__);; WebLogRep("*.log*", "~/DemoWebLog.DD"); #endif