From c6b83b9f0e0b48a289c7958a51f774ed3cf27517 Mon Sep 17 00:00:00 2001 From: Tristan Smith Date: Mon, 27 May 2024 14:54:12 -0400 Subject: [PATCH] left results for demo purposes, refactored a bit, started using pandas --- 2024/May/27/results.csv | 34 +++++++++++++++++++++++ finder.py | 60 ++++++++++++++++++++++++++--------------- readme.md | 4 ++- 3 files changed, 76 insertions(+), 22 deletions(-) create mode 100644 2024/May/27/results.csv diff --git a/2024/May/27/results.csv b/2024/May/27/results.csv new file mode 100644 index 0000000..3ec3692 --- /dev/null +++ b/2024/May/27/results.csv @@ -0,0 +1,34 @@ +Date,Worker ID,IP Address,Log File,Error Type,Error Message +2024-05-27,mw3446,192.168.1.171,/var/log/domokun,ASIC Error,Chain 2 has failed with 0 ASICs found and will power off hash board 2 +2024-05-27,mw3446,192.168.1.171,/var/log/new2.log,ASIC Error,Chain 0 has failed with 0 ASICs found and will power off hash board 0 +2024-05-27,mw3446,192.168.1.171,/var/log/Miner_7426394.log,EEPROM Error,Data load fail for chain 1 +2024-05-27,mw3446,192.168.1.171,/var/log/Miner_7426394.log,Chip Bin Error,No chip bin for chain 0 +2024-05-27,mw3446,192.168.1.171,/var/log/Miner_7426394.log,Chip Bin Error,No chip bin for chain 1 +2024-05-27,mw3446,192.168.1.171,/var/log/Miner_7426394.log,Chip Bin Error,No chip bin for chain 2 +2024-05-27,mw3446,192.168.1.171,/var/log/Miner_6974116.log,ASIC Error,Chain 1 has failed with 0 ASICs found and will power off hash board 1 +2024-05-27,mw3446,192.168.1.171,/var/log/Miner_7426352.log,EEPROM Error,Data load fail for chain 1 +2024-05-27,mw3446,192.168.1.171,/var/log/Miner_7426352.log,EEPROM Error,Data load fail for chain 2 +2024-05-27,mw3446,192.168.1.171,/var/log/new1.log,ASIC Error,Chain 0 has failed with 96 ASICs found and will power off hash board 0 +2024-05-27,mw3446,192.168.1.171,/var/log/failures,ASIC Error,Chain 0 has failed with 0 ASICs found and will power off hash board 0 +2024-05-27,mw3446,192.168.1.171,/var/log/failures,ASIC Error,Chain 1 has failed with 104 ASICs found and will power off hash board 1 +2024-05-27,mw3446,192.168.1.171,/var/log/new2.log,SoC failure,ERROR_SOC_INIT: soc init failed +2024-05-27,mw3446,192.168.1.171,/var/log/Miner_3174359.log,SoC failure,ERROR_SOC_INIT: soc init failed +2024-05-27,mw3446,192.168.1.171,/var/log/Miner_3174359.log,PSU,bitmain_get_power_status failed +2024-05-27,mw3446,192.168.1.171,/var/log/Miner_3174359.log,PSU,power voltage can not meet the target +2024-05-27,mw3446,192.168.1.171,/var/log/Miner_4594191.log,voltage drop,ERROR_POWER_LOST: power voltage rise or drop +2024-05-27,mw3446,192.168.1.171,/var/log/Miner_4594191.log,black hole,reg crc error +2024-05-27,mw3446,192.168.1.171,/var/log/Miner_7426394.log,Temperature Error,ERROR_TEMP_TOO_HIGH +2024-05-27,mw3446,192.168.1.171,/var/log/Miner_7426394.log,PIC Error,_pic_write_iic failed! +2024-05-27,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_19-00-57/miner.log,SoC failure,ERROR_SOC_INIT: soc init failed +2024-05-27,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_19-00-57/messages,SoC failure,ERROR_SOC_INIT: soc init failed +2024-05-27,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_18-53-25/miner.log,SoC failure,ERROR_SOC_INIT: soc init failed +2024-05-27,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_18-53-25/messages,SoC failure,ERROR_SOC_INIT: soc init failed +2024-05-27,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_19-07-47/miner.log,SoC failure,ERROR_SOC_INIT: soc init failed +2024-05-27,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_19-07-47/messages,SoC failure,ERROR_SOC_INIT: soc init failed +2024-05-27,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_20-51-43/miner.log,SoC failure,ERROR_SOC_INIT: soc init failed +2024-05-27,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_20-51-43/messages,SoC failure,ERROR_SOC_INIT: soc init failed +2024-05-27,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_18-01-31/miner.log,SoC failure,ERROR_SOC_INIT: soc init failed +2024-05-27,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_18-01-31/messages,SoC failure,ERROR_SOC_INIT: soc init failed +2024-05-27,mw3446,192.168.1.171,/var/log/Miner_6024072.log,voltage drop,ERROR_POWER_LOST: pic check voltage drop +2024-05-27,mw3446,192.168.1.171,/var/log/test,voltage drop,ERROR_POWER_LOST: power voltage rise or drop +2024-05-27,mw3446,192.168.1.171,/var/log/new1.log,SoC failure,ERROR_SOC_INIT: soc init failed diff --git a/finder.py b/finder.py index 80fc356..204ea13 100644 --- a/finder.py +++ b/finder.py @@ -1,7 +1,9 @@ import paramiko -import csv import re import json +import os +from datetime import datetime +import pandas as pd # Load credentials from a JSON file def load_credentials(file_path): @@ -34,10 +36,10 @@ def read_ips(file_path): return [ip.strip() for ip in ips] # Function to check log files for keywords and ASIC errors -def check_logs(ip, ssh_client, worker_id): +def check_logs(ip, ssh_client, worker_id, current_date): logs = [] asic_errors = set() # Using set to avoid duplicate errors - results = set() # Using set to avoid duplicate entries + results = [] # Using list to avoid duplicate entries try: print(f"Checking logs on {ip}") stdin, stdout, stderr = ssh_client.exec_command("find /var/log/ -type f") @@ -58,9 +60,13 @@ def check_logs(ip, ssh_client, worker_id): stdin, stdout, stderr = ssh_client.exec_command(f"cat {log_file}") log_content = stdout.read().decode('utf-8', errors='ignore') print(f"Content of {log_file}: {log_content[:500]}") # Debug statement to show part of the log content + + # Track unique errors within this log file + seen_errors = set() for keyword, error_type in error_keywords.items(): - if keyword in log_content: + if keyword in log_content and (log_file, error_type, keyword) not in seen_errors: logs.append((log_file, error_type, keyword)) + seen_errors.add((log_file, error_type, keyword)) # Check for ASIC chip errors and power-off messages for match in asic_pattern.finditer(log_content): @@ -74,20 +80,23 @@ def check_logs(ip, ssh_client, worker_id): chain, found_asic_count, board = match.groups() found_asic_count = int(found_asic_count) chain = int(chain) - print(f"Power-off message found: Chain {chain}, ASIC count: {found_asic_count}, Board: {board}") # Debug statement - results.add((worker_id, ip, log_file, "ASIC Error", f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}")) + if (log_file, "ASIC Error", f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}") not in seen_errors: + results.append((current_date, worker_id, ip, log_file, "ASIC Error", f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}")) + seen_errors.add((log_file, "ASIC Error", f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}")) # Check for EEPROM errors for match in eeprom_error_pattern.finditer(log_content): chain = match.group(1) - print(f"EEPROM error found: Chain {chain}") # Debug statement - results.add((worker_id, ip, log_file, "EEPROM Error", f"Data load fail for chain {chain}")) + if (log_file, "EEPROM Error", f"Data load fail for chain {chain}") not in seen_errors: + results.append((current_date, worker_id, ip, log_file, "EEPROM Error", f"Data load fail for chain {chain}")) + seen_errors.add((log_file, "EEPROM Error", f"Data load fail for chain {chain}")) # Check for chip bin errors for match in chip_bin_pattern.finditer(log_content): chain = match.group(1) - print(f"Chip bin error found: Chain {chain}") # Debug statement - results.add((worker_id, ip, log_file, "Chip Bin Error", f"No chip bin for chain {chain}")) + if (log_file, "Chip Bin Error", f"No chip bin for chain {chain}") not in seen_errors: + results.append((current_date, worker_id, ip, log_file, "Chip Bin Error", f"No chip bin for chain {chain}")) + seen_errors.add((log_file, "Chip Bin Error", f"No chip bin for chain {chain}")) except Exception as e: print(f"Error checking logs on {ip}: {e}") @@ -114,7 +123,11 @@ def get_worker_id(ssh_client): # Main function to iterate over IPs and check for errors def main(): ips = read_ips('ips.txt') - results = set() # Using set to avoid duplicate entries + results = [] # Using a list to collect results + current_date = datetime.now().strftime('%Y-%m-%d') + current_year = datetime.now().strftime('%Y') + current_month = datetime.now().strftime('%B') + current_day = datetime.now().strftime('%d') for ip in ips: print(f"Processing IP: {ip}") @@ -130,30 +143,35 @@ def main(): ssh_client.connect(ip, username=username, password=password) connected = True worker_id = get_worker_id(ssh_client) - logs, asic_errors, asic_results = check_logs(ip, ssh_client, worker_id) - results.update(asic_results) + logs, asic_errors, asic_results = check_logs(ip, ssh_client, worker_id, current_date) + results.extend(asic_results) for log in logs: - results.add((worker_id, ip, log[0], log[1], log[2])) + results.append((current_date, worker_id, ip, log[0], log[1], log[2])) unique_asic_errors = {} # Using a dictionary to store chain and failed check count. for chain, asic_count in asic_errors: failed_checks = unique_asic_errors.get(chain, 0) + 1 unique_asic_errors[chain] = failed_checks if asic_count == 0 and failed_checks == 3: - results.add((worker_id, ip, log[0], "ASIC Error", f"Chain {chain} has 3 failed checks with {asic_count} ASICs found")) + results.append((current_date, worker_id, ip, log[0], "ASIC Error", f"Chain {chain} has 3 failed checks with {asic_count} ASICs found")) ssh_client.close() break except Exception as e: print(f"Connection failed for {ip} with {username}:{password} - {e}") ssh_client.close() - - # Write results to CSV + + # Create the directory structure + directory = os.path.join(current_year, current_month, current_day) + if not os.path.exists(directory): + os.makedirs(directory) + + # Convert results to a DataFrame + results_df = pd.DataFrame(results, columns=["Date", "Worker ID", "IP Address", "Log File", "Error Type", "Error Message"]) + + # Save the results to a CSV file print("Writing results to CSV") - with open('results.csv', 'w', newline='') as file: - writer = csv.writer(file) - writer.writerow(["Worker ID", "IP Address", "Log File", "Error Type", "Error Message"]) - writer.writerows(results) + results_df.to_csv(os.path.join(directory, 'results.csv'), index=False) print("Done") if __name__ == "__main__": diff --git a/readme.md b/readme.md index fec4e0b..08129ca 100644 --- a/readme.md +++ b/readme.md @@ -1,3 +1,5 @@ # Antminer error finder -Checks for specific strings in log files of Antminer machines. Currently works with stock firmware (and barely at that). Expand to Braiins & LuxOS. +Checks for specific strings in log files of Antminer machines. Currently works with stock firmware (and barely at that). Expand to Braiins & LuxOS. This now outputs a csv file to a folder structure of ///. + +Started using pandas for writing the csv and for grouping of errors based on machine.