diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d9f918c --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +2024/05/30.csv +2024/05/30-2.txt +results.csv +miner_logs.log diff --git a/finder.py b/finder.py index 5464f73..f50e558 100644 --- a/finder.py +++ b/finder.py @@ -2,8 +2,17 @@ import paramiko import re import json import csv +import logging from datetime import datetime +# Constants for error types +ASIC_ERROR = "ASIC Error" +EEPROM_ERROR = "EEPROM Error" +CHIP_BIN_ERROR = "Chip Bin Error" + +# Logging configuration +logging.basicConfig(filename='miner_logs.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + # Load credentials from a JSON file def load_credentials(file_path): with open(file_path, 'r') as file: @@ -26,83 +35,77 @@ def read_ips(file_path): ips = file.readlines() return [ip.strip() for ip in ips] -# Function to check log files for keywords and ASIC errors -def check_logs(ip, ssh_client, worker_id, current_date): - logs = [] - asic_errors = set() # Using set to avoid duplicate errors - results = [] # Using list to avoid duplicate entries +# Function to establish an SSH connection +def establish_ssh_connection(ip, username, password): + ssh_client = paramiko.SSHClient() + ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) try: - print(f"Checking logs on {ip}") - stdin, stdout, stderr = ssh_client.exec_command("find /var/log/ -type f") - log_files = stdout.readlines() - for log_file in log_files: - log_file = log_file.strip() - print(f"Checking file: {log_file}") # Debug statement - - # Read the log file content directly - stdin, stdout, stderr = ssh_client.exec_command(f"cat {log_file}") - log_content = stdout.read().decode('utf-8', errors='ignore') - print(f"Content of {log_file}: {log_content[:500]}") # Debug statement to show part of the log content - - # Track unique errors within this log file - seen_errors = set() - for keyword, error_type in error_keywords.items(): - if keyword in log_content and (log_file, error_type, keyword) not in seen_errors: - print(f"Found keyword '{keyword}' in {log_file}") # Debug statement - logs.append((log_file, error_type, keyword)) - seen_errors.add((log_file, error_type, keyword)) - - # Check for ASIC chip errors and power-off messages - for match in asic_pattern.finditer(log_content): - chain, asic_count = match.groups() - asic_count = int(asic_count) - asic_errors.add((chain, asic_count)) - print(f"Chain {chain} has {asic_count} chips.") # Debug statement - - # Check for power-off messages - for match in power_off_pattern.finditer(log_content): - chain, found_asic_count, board = match.groups() - found_asic_count = int(found_asic_count) - chain = int(chain) - if (log_file, "ASIC Error", f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}") not in seen_errors: - results.append((current_date, worker_id, ip, log_file, "ASIC Error", f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}")) - seen_errors.add((log_file, "ASIC Error", f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}")) - - # Check for EEPROM errors - for match in eeprom_error_pattern.finditer(log_content): - chain = match.group(1) - if (log_file, "EEPROM Error", f"Data load fail for chain {chain}") not in seen_errors: - results.append((current_date, worker_id, ip, log_file, "EEPROM Error", f"Data load fail for chain {chain}")) - seen_errors.add((log_file, "EEPROM Error", f"Data load fail for chain {chain}")) - - # Check for chip bin errors - for match in chip_bin_pattern.finditer(log_content): - chain = match.group(1) - if (log_file, "Chip Bin Error", f"No chip bin for chain {chain}") not in seen_errors: - results.append((current_date, worker_id, ip, log_file, "Chip Bin Error", f"No chip bin for chain {chain}")) - seen_errors.add((log_file, "Chip Bin Error", f"No chip bin for chain {chain}")) - + ssh_client.connect(ip, username=username, password=password, timeout=5) + logging.info(f"Connected to {ip} with {username}") + return ssh_client except Exception as e: - print(f"Error checking logs on {ip}: {e}") - return logs, asic_errors, results + logging.error(f"Failed to connect to {ip} with {username}:{password} - {e}") + return None + +# Function to execute a command via SSH and return the output +def execute_ssh_command(ssh_client, command): + try: + stdin, stdout, stderr = ssh_client.exec_command(command) + return stdout.read().decode('utf-8') + except Exception as e: + logging.error(f"Error executing command '{command}': {e}") + return None # Function to get worker ID def get_worker_id(ssh_client): - try: - print("Getting worker ID") - stdin, stdout, stderr = ssh_client.exec_command("cat /config/cgminer.conf") - config_content = stdout.read().decode('utf-8') - # Extract the worker ID from the user field + config_content = execute_ssh_command(ssh_client, "cat /config/cgminer.conf") + if config_content: match = re.search(r'"user" *: *"[^.]*\.(\w+)"', config_content) if match: - worker_id = match.group(1) - print(f"Got Worker ID: {worker_id}") - else: - worker_id = "Unknown" - except Exception as e: - print(f"Error getting worker ID: {e}") - worker_id = "Unknown" - return worker_id + return match.group(1) + return "Unknown" + +# Function to check log files for keywords and ASIC errors +def check_logs(ip, ssh_client, worker_id, current_date, error_keywords): + logs = [] + asic_errors = set() # Using set to avoid duplicate errors + results = [] # Using list to avoid duplicate entries + log_files_content = execute_ssh_command(ssh_client, "find /var/log/ -type f") + if log_files_content: + log_files = log_files_content.splitlines() + for log_file in log_files: + log_content = execute_ssh_command(ssh_client, f"cat {log_file}") + if log_content: + seen_errors = set() + for keyword, error_type in error_keywords.items(): + if keyword in log_content and (log_file, error_type, keyword) not in seen_errors: + logs.append((log_file, error_type, keyword)) + seen_errors.add((log_file, error_type, keyword)) + + for match in asic_pattern.finditer(log_content): + chain, asic_count = match.groups() + asic_errors.add((chain, int(asic_count))) + + for match in power_off_pattern.finditer(log_content): + chain, found_asic_count, board = match.groups() + chain = int(chain) + found_asic_count = int(found_asic_count) + if (log_file, ASIC_ERROR, f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}") not in seen_errors: + results.append((current_date, worker_id, ip, log_file, ASIC_ERROR, f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}")) + seen_errors.add((log_file, ASIC_ERROR, f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}")) + + for match in eeprom_error_pattern.finditer(log_content): + chain = match.group(1) + if (log_file, EEPROM_ERROR, f"Data load fail for chain {chain}") not in seen_errors: + results.append((current_date, worker_id, ip, log_file, EEPROM_ERROR, f"Data load fail for chain {chain}")) + seen_errors.add((log_file, EEPROM_ERROR, f"Data load fail for chain {chain}")) + + for match in chip_bin_pattern.finditer(log_content): + chain = match.group(1) + if (log_file, CHIP_BIN_ERROR, f"No chip bin for chain {chain}") not in seen_errors: + results.append((current_date, worker_id, ip, log_file, CHIP_BIN_ERROR, f"No chip bin for chain {chain}")) + seen_errors.add((log_file, CHIP_BIN_ERROR, f"No chip bin for chain {chain}")) + return logs, asic_errors, results # Main function to iterate over IPs and check for errors def main(): @@ -111,46 +114,40 @@ def main(): current_date = datetime.now().strftime('%Y-%m-%d') for ip in ips: - print(f"Processing IP: {ip}") + logging.info(f"Processing IP: {ip}") connected = False for os_type, creds in credentials.items(): if connected: break for username, password in creds: - ssh_client = paramiko.SSHClient() - ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - try: - print(f"Trying {username}:{password} on {ip}") - ssh_client.connect(ip, username=username, password=password) + ssh_client = establish_ssh_connection(ip, username, password) + if ssh_client: connected = True worker_id = get_worker_id(ssh_client) - logs, asic_errors, asic_results = check_logs(ip, ssh_client, worker_id, current_date) + logs, asic_errors, asic_results = check_logs(ip, ssh_client, worker_id, current_date, error_keywords) results.extend(asic_results) for log in logs: results.append((current_date, worker_id, ip, log[0], log[1], log[2])) - + unique_asic_errors = {} # Using a dictionary to store chain and failed check count. for chain, asic_count in asic_errors: failed_checks = unique_asic_errors.get(chain, 0) + 1 unique_asic_errors[chain] = failed_checks if asic_count == 0 and failed_checks == 3: - results.append((current_date, worker_id, ip, log[0], "ASIC Error", f"Chain {chain} has 3 failed checks with {asic_count} ASICs found")) - + results.append((current_date, worker_id, ip, "N/A", ASIC_ERROR, f"Chain {chain} has 3 failed checks with {asic_count} ASICs found")) + ssh_client.close() break - except Exception as e: - print(f"Connection failed for {ip} with {username}:{password} - {e}") - ssh_client.close() # Write results to CSV csv_file = 'results.csv' - print(f"Writing results to {csv_file}") + logging.info(f"Writing results to {csv_file}") with open(csv_file, 'w', newline='') as file: writer = csv.writer(file) writer.writerow(["Date", "Worker ID", "IP Address", "Log File", "Error Type", "Error Message"]) for result in results: writer.writerow(result) - print("Done") + logging.info("Done") if __name__ == "__main__": # Load credentials and error keywords diff --git a/ips.txt b/ips.txt index 7a959db..6fadc16 100644 --- a/ips.txt +++ b/ips.txt @@ -1,20 +1 @@ -10.0.90.105 -10.0.80.243 -10.0.60.194 -10.0.60.189 -10.0.50.164 -10.0.50.28 -10.0.50.156 -10.0.40.191 -10.0.40.118 -10.0.40.189 -10.0.40.155 -10.0.40.244 -10.0.40.203 -10.0.30.178 -10.0.20.163 -10.0.20.59 -10.0.20.210 -10.0.20.131 -10.0.10.169 -10.0.100.54 +192.168.1.171