so far so good
This commit is contained in:
parent
46bc73185d
commit
b33b2fbd6e
3 changed files with 87 additions and 105 deletions
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
2024/05/30.csv
|
||||
2024/05/30-2.txt
|
||||
results.csv
|
||||
miner_logs.log
|
167
finder.py
167
finder.py
|
@ -2,8 +2,17 @@ import paramiko
|
|||
import re
|
||||
import json
|
||||
import csv
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
# Constants for error types
|
||||
ASIC_ERROR = "ASIC Error"
|
||||
EEPROM_ERROR = "EEPROM Error"
|
||||
CHIP_BIN_ERROR = "Chip Bin Error"
|
||||
|
||||
# Logging configuration
|
||||
logging.basicConfig(filename='miner_logs.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
# Load credentials from a JSON file
|
||||
def load_credentials(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
|
@ -26,83 +35,77 @@ def read_ips(file_path):
|
|||
ips = file.readlines()
|
||||
return [ip.strip() for ip in ips]
|
||||
|
||||
# Function to check log files for keywords and ASIC errors
|
||||
def check_logs(ip, ssh_client, worker_id, current_date):
|
||||
logs = []
|
||||
asic_errors = set() # Using set to avoid duplicate errors
|
||||
results = [] # Using list to avoid duplicate entries
|
||||
# Function to establish an SSH connection
|
||||
def establish_ssh_connection(ip, username, password):
|
||||
ssh_client = paramiko.SSHClient()
|
||||
ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
try:
|
||||
print(f"Checking logs on {ip}")
|
||||
stdin, stdout, stderr = ssh_client.exec_command("find /var/log/ -type f")
|
||||
log_files = stdout.readlines()
|
||||
for log_file in log_files:
|
||||
log_file = log_file.strip()
|
||||
print(f"Checking file: {log_file}") # Debug statement
|
||||
|
||||
# Read the log file content directly
|
||||
stdin, stdout, stderr = ssh_client.exec_command(f"cat {log_file}")
|
||||
log_content = stdout.read().decode('utf-8', errors='ignore')
|
||||
print(f"Content of {log_file}: {log_content[:500]}") # Debug statement to show part of the log content
|
||||
|
||||
# Track unique errors within this log file
|
||||
seen_errors = set()
|
||||
for keyword, error_type in error_keywords.items():
|
||||
if keyword in log_content and (log_file, error_type, keyword) not in seen_errors:
|
||||
print(f"Found keyword '{keyword}' in {log_file}") # Debug statement
|
||||
logs.append((log_file, error_type, keyword))
|
||||
seen_errors.add((log_file, error_type, keyword))
|
||||
|
||||
# Check for ASIC chip errors and power-off messages
|
||||
for match in asic_pattern.finditer(log_content):
|
||||
chain, asic_count = match.groups()
|
||||
asic_count = int(asic_count)
|
||||
asic_errors.add((chain, asic_count))
|
||||
print(f"Chain {chain} has {asic_count} chips.") # Debug statement
|
||||
|
||||
# Check for power-off messages
|
||||
for match in power_off_pattern.finditer(log_content):
|
||||
chain, found_asic_count, board = match.groups()
|
||||
found_asic_count = int(found_asic_count)
|
||||
chain = int(chain)
|
||||
if (log_file, "ASIC Error", f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}") not in seen_errors:
|
||||
results.append((current_date, worker_id, ip, log_file, "ASIC Error", f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}"))
|
||||
seen_errors.add((log_file, "ASIC Error", f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}"))
|
||||
|
||||
# Check for EEPROM errors
|
||||
for match in eeprom_error_pattern.finditer(log_content):
|
||||
chain = match.group(1)
|
||||
if (log_file, "EEPROM Error", f"Data load fail for chain {chain}") not in seen_errors:
|
||||
results.append((current_date, worker_id, ip, log_file, "EEPROM Error", f"Data load fail for chain {chain}"))
|
||||
seen_errors.add((log_file, "EEPROM Error", f"Data load fail for chain {chain}"))
|
||||
|
||||
# Check for chip bin errors
|
||||
for match in chip_bin_pattern.finditer(log_content):
|
||||
chain = match.group(1)
|
||||
if (log_file, "Chip Bin Error", f"No chip bin for chain {chain}") not in seen_errors:
|
||||
results.append((current_date, worker_id, ip, log_file, "Chip Bin Error", f"No chip bin for chain {chain}"))
|
||||
seen_errors.add((log_file, "Chip Bin Error", f"No chip bin for chain {chain}"))
|
||||
|
||||
ssh_client.connect(ip, username=username, password=password, timeout=5)
|
||||
logging.info(f"Connected to {ip} with {username}")
|
||||
return ssh_client
|
||||
except Exception as e:
|
||||
print(f"Error checking logs on {ip}: {e}")
|
||||
return logs, asic_errors, results
|
||||
logging.error(f"Failed to connect to {ip} with {username}:{password} - {e}")
|
||||
return None
|
||||
|
||||
# Function to execute a command via SSH and return the output
|
||||
def execute_ssh_command(ssh_client, command):
|
||||
try:
|
||||
stdin, stdout, stderr = ssh_client.exec_command(command)
|
||||
return stdout.read().decode('utf-8')
|
||||
except Exception as e:
|
||||
logging.error(f"Error executing command '{command}': {e}")
|
||||
return None
|
||||
|
||||
# Function to get worker ID
|
||||
def get_worker_id(ssh_client):
|
||||
try:
|
||||
print("Getting worker ID")
|
||||
stdin, stdout, stderr = ssh_client.exec_command("cat /config/cgminer.conf")
|
||||
config_content = stdout.read().decode('utf-8')
|
||||
# Extract the worker ID from the user field
|
||||
config_content = execute_ssh_command(ssh_client, "cat /config/cgminer.conf")
|
||||
if config_content:
|
||||
match = re.search(r'"user" *: *"[^.]*\.(\w+)"', config_content)
|
||||
if match:
|
||||
worker_id = match.group(1)
|
||||
print(f"Got Worker ID: {worker_id}")
|
||||
else:
|
||||
worker_id = "Unknown"
|
||||
except Exception as e:
|
||||
print(f"Error getting worker ID: {e}")
|
||||
worker_id = "Unknown"
|
||||
return worker_id
|
||||
return match.group(1)
|
||||
return "Unknown"
|
||||
|
||||
# Function to check log files for keywords and ASIC errors
|
||||
def check_logs(ip, ssh_client, worker_id, current_date, error_keywords):
|
||||
logs = []
|
||||
asic_errors = set() # Using set to avoid duplicate errors
|
||||
results = [] # Using list to avoid duplicate entries
|
||||
log_files_content = execute_ssh_command(ssh_client, "find /var/log/ -type f")
|
||||
if log_files_content:
|
||||
log_files = log_files_content.splitlines()
|
||||
for log_file in log_files:
|
||||
log_content = execute_ssh_command(ssh_client, f"cat {log_file}")
|
||||
if log_content:
|
||||
seen_errors = set()
|
||||
for keyword, error_type in error_keywords.items():
|
||||
if keyword in log_content and (log_file, error_type, keyword) not in seen_errors:
|
||||
logs.append((log_file, error_type, keyword))
|
||||
seen_errors.add((log_file, error_type, keyword))
|
||||
|
||||
for match in asic_pattern.finditer(log_content):
|
||||
chain, asic_count = match.groups()
|
||||
asic_errors.add((chain, int(asic_count)))
|
||||
|
||||
for match in power_off_pattern.finditer(log_content):
|
||||
chain, found_asic_count, board = match.groups()
|
||||
chain = int(chain)
|
||||
found_asic_count = int(found_asic_count)
|
||||
if (log_file, ASIC_ERROR, f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}") not in seen_errors:
|
||||
results.append((current_date, worker_id, ip, log_file, ASIC_ERROR, f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}"))
|
||||
seen_errors.add((log_file, ASIC_ERROR, f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}"))
|
||||
|
||||
for match in eeprom_error_pattern.finditer(log_content):
|
||||
chain = match.group(1)
|
||||
if (log_file, EEPROM_ERROR, f"Data load fail for chain {chain}") not in seen_errors:
|
||||
results.append((current_date, worker_id, ip, log_file, EEPROM_ERROR, f"Data load fail for chain {chain}"))
|
||||
seen_errors.add((log_file, EEPROM_ERROR, f"Data load fail for chain {chain}"))
|
||||
|
||||
for match in chip_bin_pattern.finditer(log_content):
|
||||
chain = match.group(1)
|
||||
if (log_file, CHIP_BIN_ERROR, f"No chip bin for chain {chain}") not in seen_errors:
|
||||
results.append((current_date, worker_id, ip, log_file, CHIP_BIN_ERROR, f"No chip bin for chain {chain}"))
|
||||
seen_errors.add((log_file, CHIP_BIN_ERROR, f"No chip bin for chain {chain}"))
|
||||
return logs, asic_errors, results
|
||||
|
||||
# Main function to iterate over IPs and check for errors
|
||||
def main():
|
||||
|
@ -111,46 +114,40 @@ def main():
|
|||
current_date = datetime.now().strftime('%Y-%m-%d')
|
||||
|
||||
for ip in ips:
|
||||
print(f"Processing IP: {ip}")
|
||||
logging.info(f"Processing IP: {ip}")
|
||||
connected = False
|
||||
for os_type, creds in credentials.items():
|
||||
if connected:
|
||||
break
|
||||
for username, password in creds:
|
||||
ssh_client = paramiko.SSHClient()
|
||||
ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
try:
|
||||
print(f"Trying {username}:{password} on {ip}")
|
||||
ssh_client.connect(ip, username=username, password=password)
|
||||
ssh_client = establish_ssh_connection(ip, username, password)
|
||||
if ssh_client:
|
||||
connected = True
|
||||
worker_id = get_worker_id(ssh_client)
|
||||
logs, asic_errors, asic_results = check_logs(ip, ssh_client, worker_id, current_date)
|
||||
logs, asic_errors, asic_results = check_logs(ip, ssh_client, worker_id, current_date, error_keywords)
|
||||
results.extend(asic_results)
|
||||
for log in logs:
|
||||
results.append((current_date, worker_id, ip, log[0], log[1], log[2]))
|
||||
|
||||
|
||||
unique_asic_errors = {} # Using a dictionary to store chain and failed check count.
|
||||
for chain, asic_count in asic_errors:
|
||||
failed_checks = unique_asic_errors.get(chain, 0) + 1
|
||||
unique_asic_errors[chain] = failed_checks
|
||||
if asic_count == 0 and failed_checks == 3:
|
||||
results.append((current_date, worker_id, ip, log[0], "ASIC Error", f"Chain {chain} has 3 failed checks with {asic_count} ASICs found"))
|
||||
|
||||
results.append((current_date, worker_id, ip, "N/A", ASIC_ERROR, f"Chain {chain} has 3 failed checks with {asic_count} ASICs found"))
|
||||
|
||||
ssh_client.close()
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"Connection failed for {ip} with {username}:{password} - {e}")
|
||||
ssh_client.close()
|
||||
|
||||
# Write results to CSV
|
||||
csv_file = 'results.csv'
|
||||
print(f"Writing results to {csv_file}")
|
||||
logging.info(f"Writing results to {csv_file}")
|
||||
with open(csv_file, 'w', newline='') as file:
|
||||
writer = csv.writer(file)
|
||||
writer.writerow(["Date", "Worker ID", "IP Address", "Log File", "Error Type", "Error Message"])
|
||||
for result in results:
|
||||
writer.writerow(result)
|
||||
print("Done")
|
||||
logging.info("Done")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Load credentials and error keywords
|
||||
|
|
21
ips.txt
21
ips.txt
|
@ -1,20 +1 @@
|
|||
10.0.90.105
|
||||
10.0.80.243
|
||||
10.0.60.194
|
||||
10.0.60.189
|
||||
10.0.50.164
|
||||
10.0.50.28
|
||||
10.0.50.156
|
||||
10.0.40.191
|
||||
10.0.40.118
|
||||
10.0.40.189
|
||||
10.0.40.155
|
||||
10.0.40.244
|
||||
10.0.40.203
|
||||
10.0.30.178
|
||||
10.0.20.163
|
||||
10.0.20.59
|
||||
10.0.20.210
|
||||
10.0.20.131
|
||||
10.0.10.169
|
||||
10.0.100.54
|
||||
192.168.1.171
|
||||
|
|
Loading…
Reference in a new issue