From 078dfd4219d2c3d190000b1cae3904e66cdbee6b Mon Sep 17 00:00:00 2001 From: Tristan Smith Date: Sat, 25 May 2024 18:37:55 -0400 Subject: [PATCH] added column to csv, added more regex, added more error keywords --- credentials.json | 4 ++-- credentials.json.comments | 1 + errors.json | 26 +++++++++++++----------- finder.py | 42 +++++++++++++++++++++++++-------------- warnings.json | 5 +++++ 5 files changed, 49 insertions(+), 29 deletions(-) create mode 100644 credentials.json.comments create mode 100644 warnings.json diff --git a/credentials.json b/credentials.json index 9029638..c3684d2 100644 --- a/credentials.json +++ b/credentials.json @@ -1,6 +1,6 @@ { "stock": [["miner", "miner"], ["root", "root"]], "braiins": [["root", "root"]], - "luxos": [["root", "root"]], - // "vnish": [["user", "password"]] // apparently vnish's ssh port is closed? I messaged them on telegram asking about it. + "luxos": [["root", "root"]] } + diff --git a/credentials.json.comments b/credentials.json.comments new file mode 100644 index 0000000..41eca78 --- /dev/null +++ b/credentials.json.comments @@ -0,0 +1 @@ +vnish does not have an ssh option. I messaged their developers on telegram and they said it was closed. :( \ No newline at end of file diff --git a/errors.json b/errors.json index 92f3ade..1b4813b 100644 --- a/errors.json +++ b/errors.json @@ -1,14 +1,16 @@ { - "error_keywords": [ - "ERROR_TEMP_TOO_HIGH", - "ERROR_HASHRATE_TOO_LOW", - "ERROR_NETWORK_DISCONNECTED", - "ERROR_POWER_LOST: power voltage rise or drop", - "SWEEP_STRING", - "_pic_write_iic failed!", - "PLL read exceeded wait time", - "ERROR_SOC_INIT: soc init failed", - "fail to read 0:1", - "fail to write 0:1" - ] + "error_keywords": { + "ERROR_TEMP_TOO_HIGH": "Temperature Error", + "ERROR_NETWORK_DISCONNECTED": "probably doesn't exist", + "ERROR_POWER_LOST: power voltage rise or drop": "voltage drop", + "_pic_write_iic failed!": "PIC Error", + "PLL read exceeded wait time": "PLL Error", + "ERROR_SOC_INIT: soc init failed": "SoC failure", + "fail to read 0:1": "eeprom", + "fail to write 0:1": "eeprom", + "bitmain_get_power_status failed": "PSU", + "power voltage can not meet the target": "PSU", + "reg crc error": "black hole", + "ERROR_POWER_LOST: pic check voltage drop": "voltage drop" + } } diff --git a/finder.py b/finder.py index 592d3e4..934eda4 100644 --- a/finder.py +++ b/finder.py @@ -8,7 +8,7 @@ def load_credentials(file_path): with open(file_path, 'r') as file: return json.load(file) -# Load error keywords from a JSON file +# Load error keywords and types from a JSON file def load_error_keywords(file_path): with open(file_path, 'r') as file: return json.load(file)['error_keywords'] @@ -24,6 +24,8 @@ error_keywords = load_error_keywords(ERRORS_FILE) # Regex patterns for ASIC chip errors and power-off messages asic_pattern = re.compile(r"Chain\[(\d+)\]: find (\d+) asic, times \d+") power_off_pattern = re.compile(r"Chain (\d+) only find (\d+) asic, will power off hash board (\d+)") +eeprom_error_pattern = re.compile(r"Data load fail for chain (\d+)\.") +chip_bin_pattern = re.compile(r"No chip bin, chain = (\d+)") # Function to read IP addresses from a file def read_ips(file_path): @@ -35,7 +37,7 @@ def read_ips(file_path): def check_logs(ip, ssh_client, worker_id): logs = [] asic_errors = set() # Using set to avoid duplicate errors - results = [] + results = set() # Using set to avoid duplicate entries try: print(f"Checking logs on {ip}") stdin, stdout, stderr = ssh_client.exec_command("find /var/log/ -type f") @@ -56,9 +58,9 @@ def check_logs(ip, ssh_client, worker_id): stdin, stdout, stderr = ssh_client.exec_command(f"cat {log_file}") log_content = stdout.read().decode('utf-8', errors='ignore') print(f"Content of {log_file}: {log_content[:500]}") # Debug statement to show part of the log content - for keyword in error_keywords: + for keyword, error_type in error_keywords.items(): if keyword in log_content: - logs.append((log_file, keyword)) + logs.append((log_file, error_type, keyword)) # Check for ASIC chip errors and power-off messages for match in asic_pattern.finditer(log_content): @@ -73,7 +75,19 @@ def check_logs(ip, ssh_client, worker_id): found_asic_count = int(found_asic_count) chain = int(chain) print(f"Power-off message found: Chain {chain}, ASIC count: {found_asic_count}, Board: {board}") # Debug statement - results.append([worker_id, ip, "ASIC Error", f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}"]) + results.add((worker_id, ip, log_file, "ASIC Error", f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}")) + + # Check for EEPROM errors + for match in eeprom_error_pattern.finditer(log_content): + chain = match.group(1) + print(f"EEPROM error found: Chain {chain}") # Debug statement + results.add((worker_id, ip, log_file, "EEPROM Error", f"Data load fail for chain {chain}")) + + # Check for chip bin errors + for match in chip_bin_pattern.finditer(log_content): + chain = match.group(1) + print(f"Chip bin error found: Chain {chain}") # Debug statement + results.add((worker_id, ip, log_file, "Chip Bin Error", f"No chip bin for chain {chain}")) except Exception as e: print(f"Error checking logs on {ip}: {e}") @@ -99,7 +113,7 @@ def get_worker_id(ssh_client): # Main function to iterate over IPs and check for errors def main(): ips = read_ips('ips.txt') - results = [] + results = set() # Using set to avoid duplicate entries for ip in ips: print(f"Processing IP: {ip}") @@ -116,17 +130,16 @@ def main(): connected = True worker_id = get_worker_id(ssh_client) logs, asic_errors, asic_results = check_logs(ip, ssh_client, worker_id) - results.extend(asic_results) + results.update(asic_results) for log in logs: - results.append([worker_id, ip, log[0], log[1]]) + results.add((worker_id, ip, log[0], log[1], log[2])) unique_asic_errors = {} # Using a dictionary to store chain and failed check count. for chain, asic_count in asic_errors: - for chain, asic_count in asic_errors: - failed_checks = unique_asic_errors.get(chain, 0) + 1 # array - unique_asic_errors[chain] = failed_checks - if asic_count == 0 and failed_checks == 3: - results.append([worker_id, ip, "ASIC Error", f"Chain {chain} has 3 failed checks with {asic_count} ASICs found"]) + failed_checks = unique_asic_errors.get(chain, 0) + 1 + unique_asic_errors[chain] = failed_checks + if asic_count == 0 and failed_checks == 3: + results.add((worker_id, ip, log_file, "ASIC Error", f"Chain {chain} has 3 failed checks with {asic_count} ASICs found")) ssh_client.close() break @@ -138,9 +151,8 @@ def main(): print("Writing results to CSV") with open('results.csv', 'w', newline='') as file: writer = csv.writer(file) - writer.writerow(["Worker ID", "IP Address", "Log File", "Error"]) + writer.writerow(["Worker ID", "IP Address", "Log File", "Error Type", "Error Message"]) writer.writerows(results) - print("Writing: ", results) print("Done") if __name__ == "__main__": diff --git a/warnings.json b/warnings.json new file mode 100644 index 0000000..b945bf4 --- /dev/null +++ b/warnings.json @@ -0,0 +1,5 @@ +{ + "warning_keywords": { + "the ECC used on your system is too weak compared to the one required by the NAND chip": "warning" + } +} \ No newline at end of file