diff --git a/organize_csv.py b/organize_csv.py new file mode 100644 index 0000000..33e5f84 --- /dev/null +++ b/organize_csv.py @@ -0,0 +1,20 @@ +# Not sure how this will act on a number of machines. + +import pandas as pd + +df = pd.read_csv('results.csv') + +df = df.drop_duplicates() + +df = df.sort_values(by=['Worker ID', 'IP Address']) + +# Group by Worker ID, IP Address, Log File, and Error Type +df['Error Count'] = 1 +df_grouped = df.groupby(['Worker ID', 'IP Address', 'Log File', 'Error Type'], as_index=False).agg({ + 'Error Message': 'first', + 'Error Count': 'sum' +}) + +df_grouped.to_csv('cleaned_results_grouped.csv', index=False) + +print(df_grouped)