From 77fc6c21f64417d9a516ec7fe5d8711734bbf20f Mon Sep 17 00:00:00 2001 From: Tristan Smith Date: Thu, 6 Jun 2024 00:15:29 -0400 Subject: [PATCH] added qt branch and version to start testing --- qt.py | 301 ++++++++++++++++++++++++++++++++++++++++++++++++++++ results.csv | 30 ------ results.txt | 88 --------------- 3 files changed, 301 insertions(+), 118 deletions(-) create mode 100644 qt.py delete mode 100644 results.csv delete mode 100644 results.txt diff --git a/qt.py b/qt.py new file mode 100644 index 0000000..c983f0f --- /dev/null +++ b/qt.py @@ -0,0 +1,301 @@ +import sys +import os +import paramiko +import re +import json +import csv +import logging +from datetime import datetime +from PyQt5.QtWidgets import QApplication, QWidget, QVBoxLayout, QPushButton, QLabel, QFileDialog, QTextEdit, QTreeWidget, QTreeWidgetItem, QSplitter, QHBoxLayout, QPlainTextEdit +from PyQt5.QtCore import Qt + +# Constants for error types +ASIC_ERROR = "ASIC Error" +EEPROM_ERROR = "EEPROM Error" +CHIP_BIN_ERROR = "Chip Bin Error" + +# Logging configuration +logging.basicConfig(filename='miner_logs.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + +# Load credentials from a JSON file +def load_credentials(file_path): + with open(file_path, 'r') as file: + return json.load(file) + +# Load error keywords and types from a JSON file +def load_error_keywords(file_path): + with open(file_path, 'r') as file: + return json.load(file)['error_keywords'] + +# Regex patterns for ASIC chip errors and power-off messages +asic_pattern = re.compile(r"Chain\[(\d+)\]: find (\d+) asic, times \d+") +power_off_pattern = re.compile(r"Chain (\d+) only find (\d+) asic, will power off hash board (\d+)") +eeprom_error_pattern = re.compile(r"Data load fail for chain (\d+)\.") +chip_bin_pattern = re.compile(r"No chip bin, chain = (\d+)") + +# Function to read IP addresses from a file +def read_ips(file_path): + with open(file_path, 'r') as file: + ips = file.readlines() + return [ip.strip() for ip in ips] + +# Function to establish an SSH connection +def establish_ssh_connection(ip, username, password): + ssh_client = paramiko.SSHClient() + ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + try: + ssh_client.connect(ip, username=username, password=password, timeout=5) + logging.info(f"Connected to {ip} with {username}") + return ssh_client + except Exception as e: + logging.error(f"Failed to connect to {ip} with {username}:{password} - {e}") + return None + +# Function to execute a command via SSH and return the output +def execute_ssh_command(ssh_client, command): + try: + stdin, stdout, stderr = ssh_client.exec_command(command) + return stdout.read().decode('utf-8') + except Exception as e: + logging.error(f"Error executing command '{command}': {e}") + return None + +# Function to get worker ID +def get_worker_id(ssh_client): + config_content = execute_ssh_command(ssh_client, "cat /config/cgminer.conf") + if config_content: + match = re.search(r'"user" *: *"[^.]*\.(\w+)"', config_content) + if match: + return match.group(1) + return "Unknown" + +# Function to check log files for keywords and ASIC errors +def check_logs(ip, ssh_client, worker_id, current_date, error_keywords): + logs = [] + asic_errors = set() # Using set to avoid duplicate errors + results = [] # Using list to avoid duplicate entries + log_files_content = execute_ssh_command(ssh_client, "find /var/log/ -type f") + if log_files_content: + log_files = log_files_content.splitlines() + for log_file in log_files: + log_content = execute_ssh_command(ssh_client, f"cat {log_file}") + if log_content: + seen_errors = set() + for keyword, error_type in error_keywords.items(): + if keyword in log_content and (log_file, error_type, keyword) not in seen_errors: + logs.append((log_file, error_type, keyword)) + seen_errors.add((log_file, error_type, keyword)) + + for match in asic_pattern.finditer(log_content): + chain, asic_count = match.groups() + asic_errors.add((chain, int(asic_count))) + + for match in power_off_pattern.finditer(log_content): + chain, found_asic_count, board = match.groups() + chain = int(chain) + found_asic_count = int(found_asic_count) + if (log_file, ASIC_ERROR, f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}") not in seen_errors: + results.append((current_date, worker_id, ip, log_file, ASIC_ERROR, f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}")) + seen_errors.add((log_file, ASIC_ERROR, f"Chain {chain} has failed with {found_asic_count} ASICs found and will power off hash board {board}")) + + for match in eeprom_error_pattern.finditer(log_content): + chain = match.group(1) + if (log_file, EEPROM_ERROR, f"Data load fail for chain {chain}") not in seen_errors: + results.append((current_date, worker_id, ip, log_file, EEPROM_ERROR, f"Data load fail for chain {chain}")) + seen_errors.add((log_file, EEPROM_ERROR, f"Data load fail for chain {chain}")) + + for match in chip_bin_pattern.finditer(log_content): + chain = match.group(1) + if (log_file, CHIP_BIN_ERROR, f"No chip bin for chain {chain}") not in seen_errors: + results.append((current_date, worker_id, ip, log_file, CHIP_BIN_ERROR, f"No chip bin for chain {chain}")) + seen_errors.add((log_file, CHIP_BIN_ERROR, f"No chip bin for chain {chain}")) + return logs, asic_errors, results + +# Function to write results to a text file in the specified format +def write_text_file(file_path, results): + with open(file_path, 'w') as file: + current_worker = None + for result in results: + date, worker_id, ip, log_file, error_type, error_message = result + if worker_id != current_worker: + if current_worker is not None: + file.write("\n") # Add a blank line between different workers + file.write(f"{worker_id}\n") + current_worker = worker_id + file.write(f"- {error_type}\n") + file.write(f"--- {error_message}\n") + file.write(f"-" * 80 + "\n") + +# Function to browse for a file +def browse_file(label): + options = QFileDialog.Options() + options |= QFileDialog.ReadOnly + file_path, _ = QFileDialog.getOpenFileName(None, "Select File", "", "All Files (*);;Text Files (*.txt)", options=options) + if file_path: + label.setText(file_path) + +# Main application class +class MinerApp(QWidget): + def __init__(self): + super().__init__() + self.initUI() + + def initUI(self): + main_layout = QVBoxLayout() + + # File path labels + self.ips_label = QLabel("No IPs file selected") + self.credentials_label = QLabel("No credentials file selected") + self.errors_label = QLabel("No errors file selected") + + # Set default paths if files exist + self.set_default_paths() + + # Text edit for logs + self.logs_text = QPlainTextEdit() + self.logs_text.setReadOnly(True) + + # Tree views for machines and errors + self.machines_tree = QTreeWidget() + self.machines_tree.setHeaderLabel("Machines") + self.machines_tree.itemClicked.connect(self.display_errors) + + self.errors_tree = QTreeWidget() + self.errors_tree.setHeaderLabel("Errors") + + # Buttons + browse_ips_btn = QPushButton('Browse IPs File') + browse_ips_btn.clicked.connect(lambda: browse_file(self.ips_label)) + + browse_credentials_btn = QPushButton('Browse Credentials File') + browse_credentials_btn.clicked.connect(lambda: browse_file(self.credentials_label)) + + browse_errors_btn = QPushButton('Browse Errors File') + browse_errors_btn.clicked.connect(lambda: browse_file(self.errors_label)) + + start_btn = QPushButton('Start') + start_btn.clicked.connect(self.start_process) + + # Splitter for tree views + tree_splitter = QSplitter() + tree_splitter.addWidget(self.machines_tree) + tree_splitter.addWidget(self.errors_tree) + + # Splitter for the main layout and logs + main_splitter = QSplitter(Qt.Vertical) + main_splitter.addWidget(tree_splitter) + main_splitter.addWidget(self.logs_text) + main_splitter.setSizes([500, 100]) # Initial sizes: larger for the tree views, smaller for the logs + + # Add widgets to layout + main_layout.addWidget(browse_ips_btn) + main_layout.addWidget(self.ips_label) + main_layout.addWidget(browse_credentials_btn) + main_layout.addWidget(self.credentials_label) + main_layout.addWidget(browse_errors_btn) + main_layout.addWidget(self.errors_label) + main_layout.addWidget(start_btn) + main_layout.addWidget(main_splitter) + + self.setLayout(main_layout) + self.setWindowTitle('Miner Error Checker') + self.show() + + def set_default_paths(self): + default_ips_path = "ips.txt" + default_credentials_path = "credentials.json" + default_errors_path = "errors.json" + + if os.path.exists(default_ips_path): + self.ips_label.setText(default_ips_path) + if os.path.exists(default_credentials_path): + self.credentials_label.setText(default_credentials_path) + if os.path.exists(default_errors_path): + self.errors_label.setText(default_errors_path) + + def start_process(self): + ips_path = self.ips_label.text() + credentials_path = self.credentials_label.text() + errors_path = self.errors_label.text() + + if ips_path == "No IPs file selected" or credentials_path == "No credentials file selected" or errors_path == "No errors file selected": + self.logs_text.appendPlainText("Please select all required files.") + return + + credentials = load_credentials(credentials_path) + error_keywords = load_error_keywords(errors_path) + ips = read_ips(ips_path) + + results = [] + current_date = datetime.now().strftime('%Y-%m-%d') + + self.machines_tree.clear() + self.errors_tree.clear() + + for ip in ips: + self.logs_text.appendPlainText(f"Processing IP: {ip}") + logging.info(f"Processing IP: {ip}") + connected = False + for os_type, creds in credentials.items(): + if connected: + break + for username, password in creds: + ssh_client = establish_ssh_connection(ip, username, password) + if ssh_client: + connected = True + worker_id = get_worker_id(ssh_client) + logs, asic_errors, asic_results = check_logs(ip, ssh_client, worker_id, current_date, error_keywords) + results.extend(asic_results) + for log in logs: + results.append((current_date, worker_id, ip, log[0], log[1], log[2])) + + unique_asic_errors = {} + for chain, asic_count in asic_errors: + failed_checks = unique_asic_errors.get(chain, 0) + 1 + unique_asic_errors[chain] = failed_checks + if asic_count == 0 and failed_checks == 3: + results.append((current_date, worker_id, ip, "N/A", ASIC_ERROR, f"Chain {chain} has 3 failed checks with {asic_count} ASICs found")) + + self.add_machine_item(ip, worker_id, results) + ssh_client.close() + break + + # Write results to CSV + csv_file = 'results.csv' + logging.info(f"Writing results to {csv_file}") + with open(csv_file, 'w', newline='') as file: + writer = csv.writer(file) + writer.writerow(["Date", "Worker ID", "IP Address", "Log File", "Error Type", "Error Message"]) + for result in results: + writer.writerow(result) + + # Write results to text file + text_file = 'results.txt' + logging.info(f"Writing results to {text_file}") + write_text_file(text_file, results) + + self.logs_text.appendPlainText("Process completed. Results saved to results.csv and results.txt.") + + def add_machine_item(self, ip, worker_id, results): + machine_item = QTreeWidgetItem([f"{worker_id} ({ip})"]) + machine_item.setData(0, 1, results) # Store results in the item for later retrieval + self.machines_tree.addTopLevelItem(machine_item) + + def display_errors(self, item, column): + self.errors_tree.clear() + results = item.data(0, 1) # Retrieve stored results from the item + if results: + for result in results: + date, worker_id, ip, log_file, error_type, error_message = result + error_item = QTreeWidgetItem([f"{error_type}: {error_message}"]) + self.errors_tree.addTopLevelItem(error_item) + +# Main function to run the PyQt5 application +def main(): + app = QApplication(sys.argv) + ex = MinerApp() + sys.exit(app.exec_()) + +if __name__ == "__main__": + main() +# lol \ No newline at end of file diff --git a/results.csv b/results.csv deleted file mode 100644 index 6d51882..0000000 --- a/results.csv +++ /dev/null @@ -1,30 +0,0 @@ -Date,Worker ID,IP Address,Log File,Error Type,Error Message -2024-06-01,mw3446,192.168.1.171,/var/log/domokun,ASIC Error,Chain 2 has failed with 0 ASICs found and will power off hash board 2 -2024-06-01,mw3446,192.168.1.171,/var/log/new2.log,ASIC Error,Chain 0 has failed with 0 ASICs found and will power off hash board 0 -2024-06-01,mw3446,192.168.1.171,/var/log/Miner_7426394.log,EEPROM Error,Data load fail for chain 1 -2024-06-01,mw3446,192.168.1.171,/var/log/Miner_7426394.log,Chip Bin Error,No chip bin for chain 0 -2024-06-01,mw3446,192.168.1.171,/var/log/Miner_7426394.log,Chip Bin Error,No chip bin for chain 1 -2024-06-01,mw3446,192.168.1.171,/var/log/Miner_7426394.log,Chip Bin Error,No chip bin for chain 2 -2024-06-01,mw3446,192.168.1.171,/var/log/Miner_6974116.log,ASIC Error,Chain 1 has failed with 0 ASICs found and will power off hash board 1 -2024-06-01,mw3446,192.168.1.171,/var/log/Miner_7426352.log,EEPROM Error,Data load fail for chain 1 -2024-06-01,mw3446,192.168.1.171,/var/log/Miner_7426352.log,EEPROM Error,Data load fail for chain 2 -2024-06-01,mw3446,192.168.1.171,/var/log/new2.log,SoC failure,ERROR_SOC_INIT: soc init failed -2024-06-01,mw3446,192.168.1.171,/var/log/Miner_3174359.log,SoC failure,ERROR_SOC_INIT: soc init failed -2024-06-01,mw3446,192.168.1.171,/var/log/Miner_3174359.log,PSU,bitmain_get_power_status failed -2024-06-01,mw3446,192.168.1.171,/var/log/Miner_3174359.log,PSU,power voltage can not meet the target -2024-06-01,mw3446,192.168.1.171,/var/log/Miner_4594191.log,voltage drop,ERROR_POWER_LOST: power voltage rise or drop -2024-06-01,mw3446,192.168.1.171,/var/log/Miner_4594191.log,black hole,reg crc error -2024-06-01,mw3446,192.168.1.171,/var/log/Miner_7426394.log,Temperature Error,ERROR_TEMP_TOO_HIGH -2024-06-01,mw3446,192.168.1.171,/var/log/Miner_7426394.log,PIC Error,_pic_write_iic failed! -2024-06-01,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_19-00-57/miner.log,SoC failure,ERROR_SOC_INIT: soc init failed -2024-06-01,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_19-00-57/messages,SoC failure,ERROR_SOC_INIT: soc init failed -2024-06-01,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_18-53-25/miner.log,SoC failure,ERROR_SOC_INIT: soc init failed -2024-06-01,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_18-53-25/messages,SoC failure,ERROR_SOC_INIT: soc init failed -2024-06-01,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_19-07-47/miner.log,SoC failure,ERROR_SOC_INIT: soc init failed -2024-06-01,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_19-07-47/messages,SoC failure,ERROR_SOC_INIT: soc init failed -2024-06-01,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_20-51-43/miner.log,SoC failure,ERROR_SOC_INIT: soc init failed -2024-06-01,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_20-51-43/messages,SoC failure,ERROR_SOC_INIT: soc init failed -2024-06-01,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_18-01-31/miner.log,SoC failure,ERROR_SOC_INIT: soc init failed -2024-06-01,mw3446,192.168.1.171,/var/log/2024-03/06/cglog_init_2024-03-06_18-01-31/messages,SoC failure,ERROR_SOC_INIT: soc init failed -2024-06-01,mw3446,192.168.1.171,/var/log/Miner_6024072.log,voltage drop,ERROR_POWER_LOST: pic check voltage drop -2024-06-01,mw3446,192.168.1.171,/var/log/test,voltage drop,ERROR_POWER_LOST: power voltage rise or drop diff --git a/results.txt b/results.txt deleted file mode 100644 index aa44f11..0000000 --- a/results.txt +++ /dev/null @@ -1,88 +0,0 @@ -mw3446 -- ASIC Error ---- Chain 2 has failed with 0 ASICs found and will power off hash board 2 --------------------------------------------------------------------------------- -- ASIC Error ---- Chain 0 has failed with 0 ASICs found and will power off hash board 0 --------------------------------------------------------------------------------- -- EEPROM Error ---- Data load fail for chain 1 --------------------------------------------------------------------------------- -- Chip Bin Error ---- No chip bin for chain 0 --------------------------------------------------------------------------------- -- Chip Bin Error ---- No chip bin for chain 1 --------------------------------------------------------------------------------- -- Chip Bin Error ---- No chip bin for chain 2 --------------------------------------------------------------------------------- -- ASIC Error ---- Chain 1 has failed with 0 ASICs found and will power off hash board 1 --------------------------------------------------------------------------------- -- EEPROM Error ---- Data load fail for chain 1 --------------------------------------------------------------------------------- -- EEPROM Error ---- Data load fail for chain 2 --------------------------------------------------------------------------------- -- SoC failure ---- ERROR_SOC_INIT: soc init failed --------------------------------------------------------------------------------- -- SoC failure ---- ERROR_SOC_INIT: soc init failed --------------------------------------------------------------------------------- -- PSU ---- bitmain_get_power_status failed --------------------------------------------------------------------------------- -- PSU ---- power voltage can not meet the target --------------------------------------------------------------------------------- -- voltage drop ---- ERROR_POWER_LOST: power voltage rise or drop --------------------------------------------------------------------------------- -- black hole ---- reg crc error --------------------------------------------------------------------------------- -- Temperature Error ---- ERROR_TEMP_TOO_HIGH --------------------------------------------------------------------------------- -- PIC Error ---- _pic_write_iic failed! --------------------------------------------------------------------------------- -- SoC failure ---- ERROR_SOC_INIT: soc init failed --------------------------------------------------------------------------------- -- SoC failure ---- ERROR_SOC_INIT: soc init failed --------------------------------------------------------------------------------- -- SoC failure ---- ERROR_SOC_INIT: soc init failed --------------------------------------------------------------------------------- -- SoC failure ---- ERROR_SOC_INIT: soc init failed --------------------------------------------------------------------------------- -- SoC failure ---- ERROR_SOC_INIT: soc init failed --------------------------------------------------------------------------------- -- SoC failure ---- ERROR_SOC_INIT: soc init failed --------------------------------------------------------------------------------- -- SoC failure ---- ERROR_SOC_INIT: soc init failed --------------------------------------------------------------------------------- -- SoC failure ---- ERROR_SOC_INIT: soc init failed --------------------------------------------------------------------------------- -- SoC failure ---- ERROR_SOC_INIT: soc init failed --------------------------------------------------------------------------------- -- SoC failure ---- ERROR_SOC_INIT: soc init failed --------------------------------------------------------------------------------- -- voltage drop ---- ERROR_POWER_LOST: pic check voltage drop --------------------------------------------------------------------------------- -- voltage drop ---- ERROR_POWER_LOST: power voltage rise or drop ---------------------------------------------------------------------------------