2024-05-25 07:45:09 +01:00
import paramiko
import csv
import re
2024-05-25 21:38:38 +01:00
import json
2024-05-25 07:45:09 +01:00
2024-05-25 21:38:38 +01:00
# Load credentials from a JSON file
def load_credentials ( file_path ) :
with open ( file_path , ' r ' ) as file :
return json . load ( file )
2024-05-25 23:37:55 +01:00
# Load error keywords and types from a JSON file
2024-05-25 21:38:38 +01:00
def load_error_keywords ( file_path ) :
with open ( file_path , ' r ' ) as file :
return json . load ( file ) [ ' error_keywords ' ]
2024-05-25 07:45:09 +01:00
2024-05-25 21:38:38 +01:00
# Define paths to the configuration files
CREDENTIALS_FILE = ' credentials.json '
ERRORS_FILE = ' errors.json '
2024-05-25 07:45:09 +01:00
2024-05-25 21:38:38 +01:00
# Load credentials and error keywords
credentials = load_credentials ( CREDENTIALS_FILE )
error_keywords = load_error_keywords ( ERRORS_FILE )
# Regex patterns for ASIC chip errors and power-off messages
2024-05-25 07:45:09 +01:00
asic_pattern = re . compile ( r " Chain \ [( \ d+) \ ]: find ( \ d+) asic, times \ d+ " )
2024-05-25 21:38:38 +01:00
power_off_pattern = re . compile ( r " Chain ( \ d+) only find ( \ d+) asic, will power off hash board ( \ d+) " )
2024-05-25 23:37:55 +01:00
eeprom_error_pattern = re . compile ( r " Data load fail for chain ( \ d+) \ . " )
chip_bin_pattern = re . compile ( r " No chip bin, chain = ( \ d+) " )
2024-05-25 07:45:09 +01:00
# Function to read IP addresses from a file
def read_ips ( file_path ) :
with open ( file_path , ' r ' ) as file :
ips = file . readlines ( )
return [ ip . strip ( ) for ip in ips ]
# Function to check log files for keywords and ASIC errors
2024-05-25 21:38:38 +01:00
def check_logs ( ip , ssh_client , worker_id ) :
2024-05-25 07:45:09 +01:00
logs = [ ]
2024-05-25 21:38:38 +01:00
asic_errors = set ( ) # Using set to avoid duplicate errors
2024-05-25 23:37:55 +01:00
results = set ( ) # Using set to avoid duplicate entries
2024-05-25 07:45:09 +01:00
try :
print ( f " Checking logs on { ip } " )
stdin , stdout , stderr = ssh_client . exec_command ( " find /var/log/ -type f " )
log_files = stdout . readlines ( )
for log_file in log_files :
log_file = log_file . strip ( )
2024-05-25 21:38:38 +01:00
print ( f " Checking file: { log_file } " ) # Debug statement
2024-05-25 07:45:09 +01:00
# Check if file should be ignored
if log_file . endswith ( ( ' tmp ' , ' utmp ' , ' btmp ' , ' wtmp ' ) ) :
continue
# Check if file is a binary file
stdin , stdout , stderr = ssh_client . exec_command ( f " file { log_file } " )
file_type = stdout . read ( ) . decode ( ' utf-8 ' )
if ' text ' not in file_type :
continue
stdin , stdout , stderr = ssh_client . exec_command ( f " cat { log_file } " )
log_content = stdout . read ( ) . decode ( ' utf-8 ' , errors = ' ignore ' )
2024-05-25 21:38:38 +01:00
print ( f " Content of { log_file } : { log_content [ : 500 ] } " ) # Debug statement to show part of the log content
2024-05-25 23:37:55 +01:00
for keyword , error_type in error_keywords . items ( ) :
2024-05-25 07:45:09 +01:00
if keyword in log_content :
2024-05-25 23:37:55 +01:00
logs . append ( ( log_file , error_type , keyword ) )
2024-05-25 07:45:09 +01:00
2024-05-25 21:38:38 +01:00
# Check for ASIC chip errors and power-off messages
2024-05-25 07:45:09 +01:00
for match in asic_pattern . finditer ( log_content ) :
chain , asic_count = match . groups ( )
asic_count = int ( asic_count )
2024-05-25 21:38:38 +01:00
asic_errors . add ( ( chain , asic_count ) )
print ( f " Chain { chain } has { asic_count } chips. " ) # Debug statement
2024-05-25 07:45:09 +01:00
2024-05-25 21:38:38 +01:00
# Check for power-off messages
for match in power_off_pattern . finditer ( log_content ) :
chain , found_asic_count , board = match . groups ( )
found_asic_count = int ( found_asic_count )
chain = int ( chain )
print ( f " Power-off message found: Chain { chain } , ASIC count: { found_asic_count } , Board: { board } " ) # Debug statement
2024-05-25 23:37:55 +01:00
results . add ( ( worker_id , ip , log_file , " ASIC Error " , f " Chain { chain } has failed with { found_asic_count } ASICs found and will power off hash board { board } " ) )
# Check for EEPROM errors
for match in eeprom_error_pattern . finditer ( log_content ) :
chain = match . group ( 1 )
print ( f " EEPROM error found: Chain { chain } " ) # Debug statement
results . add ( ( worker_id , ip , log_file , " EEPROM Error " , f " Data load fail for chain { chain } " ) )
# Check for chip bin errors
for match in chip_bin_pattern . finditer ( log_content ) :
chain = match . group ( 1 )
print ( f " Chip bin error found: Chain { chain } " ) # Debug statement
results . add ( ( worker_id , ip , log_file , " Chip Bin Error " , f " No chip bin for chain { chain } " ) )
2024-05-25 21:38:38 +01:00
2024-05-25 07:45:09 +01:00
except Exception as e :
print ( f " Error checking logs on { ip } : { e } " )
2024-05-25 21:38:38 +01:00
return logs , asic_errors , results
2024-05-25 07:45:09 +01:00
# Function to get worker ID
def get_worker_id ( ssh_client ) :
try :
print ( " Getting worker ID " )
stdin , stdout , stderr = ssh_client . exec_command ( " cat /config/cgminer.conf " )
config_content = stdout . read ( ) . decode ( ' utf-8 ' )
# Extract the worker ID from the user field
match = re . search ( r ' " user " *: * " [^.]* \ .( \ w+) " ' , config_content )
if match :
worker_id = match . group ( 1 )
2024-05-27 00:21:09 +01:00
print ( f " Got Worker ID: { worker_id } " )
2024-05-25 07:45:09 +01:00
else :
worker_id = " Unknown "
except Exception as e :
print ( f " Error getting worker ID: { e } " )
worker_id = " Unknown "
return worker_id
# Main function to iterate over IPs and check for errors
def main ( ) :
ips = read_ips ( ' ips.txt ' )
2024-05-25 23:37:55 +01:00
results = set ( ) # Using set to avoid duplicate entries
2024-05-25 07:45:09 +01:00
for ip in ips :
print ( f " Processing IP: { ip } " )
connected = False
for os_type , creds in credentials . items ( ) :
if connected :
break
for username , password in creds :
ssh_client = paramiko . SSHClient ( )
ssh_client . set_missing_host_key_policy ( paramiko . AutoAddPolicy ( ) )
try :
print ( f " Trying { username } : { password } on { ip } " )
ssh_client . connect ( ip , username = username , password = password )
connected = True
worker_id = get_worker_id ( ssh_client )
2024-05-25 21:38:38 +01:00
logs , asic_errors , asic_results = check_logs ( ip , ssh_client , worker_id )
2024-05-25 23:37:55 +01:00
results . update ( asic_results )
2024-05-25 07:45:09 +01:00
for log in logs :
2024-05-25 23:37:55 +01:00
results . add ( ( worker_id , ip , log [ 0 ] , log [ 1 ] , log [ 2 ] ) )
2024-05-25 07:45:09 +01:00
2024-05-25 21:38:38 +01:00
unique_asic_errors = { } # Using a dictionary to store chain and failed check count.
for chain , asic_count in asic_errors :
2024-05-25 23:37:55 +01:00
failed_checks = unique_asic_errors . get ( chain , 0 ) + 1
unique_asic_errors [ chain ] = failed_checks
if asic_count == 0 and failed_checks == 3 :
2024-05-27 00:21:09 +01:00
results . add ( ( worker_id , ip , log [ 0 ] , " ASIC Error " , f " Chain { chain } has 3 failed checks with { asic_count } ASICs found " ) )
2024-05-25 07:45:09 +01:00
ssh_client . close ( )
break
except Exception as e :
print ( f " Connection failed for { ip } with { username } : { password } - { e } " )
ssh_client . close ( )
# Write results to CSV
print ( " Writing results to CSV " )
with open ( ' results.csv ' , ' w ' , newline = ' ' ) as file :
writer = csv . writer ( file )
2024-05-25 23:37:55 +01:00
writer . writerow ( [ " Worker ID " , " IP Address " , " Log File " , " Error Type " , " Error Message " ] )
2024-05-25 07:45:09 +01:00
writer . writerows ( results )
print ( " Done " )
if __name__ == " __main__ " :
main ( )