In [4]:
import numpy as np
import pandas as pd
import os
import re
from typing import TextIO

In [66]:
def read_training_log(filename: str) -> dict:
    with open(filename, mode='r', encoding='utf-8') as log_file:
        log_lines = log_file.read().split('\n')
        description, device_info, epochs = log_lines[0], log_lines[1], log_lines[2:-1]
        
        model, batch_size, epoch_count, learning_rate = re.fullmatch(r'^Model: (.*?), Batch_size: (.*?), Epochs: (.*?), Learning Rate: (.*?)$', description).groups()
        batch_size, epoch_count = int(batch_size), int(epoch_count)
        learning_rate = float(learning_rate)
        device, = re.fullmatch(r'^Current device is (.*?)\.', device_info).groups()
        
        losses, accuracies = [], []
        for epoch in epochs:
            _, loss, accuracy = re.fullmatch(rf'^Epoch \[(.*?)/{epoch_count}\], Loss: (.*?), Accuracy: (.*?)%$', epoch).groups()
            losses.append(float(loss))
            accuracies.append(float(accuracy))
        
        best_epoch, best_accuracy = max(enumerate(accuracies), key=lambda x: x[1])
        best_epoch += 1  # fixes zero-indexing
        final_accuracy = accuracies[-1]
        
        return dict({'model': model, 'batch_size': batch_size, 'epochs': epoch_count, 'learning_rate': learning_rate, 'device': device, 'losses': losses, 'accuracy': accuracies, 'best_epoch': best_epoch, 'best_accuracy': best_accuracy, 'final_accuracy': final_accuracy})

In [53]:
read_training_log('C:/PycharmProjects/revamped/ai-programming/log2/250411-101655.log')

{'model': 'cnn',
 'batch_size': 8,
 'epochs': 40,
 'learning_rate': 3e-05,
 'device': 'cuda',
 'losses': [1.7643,
  1.5092,
  1.4144,
  1.3491,
  1.2937,
  1.2415,
  1.2344,
  1.2278,
  1.2248,
  1.2177,
  1.2106,
  1.2108,
  1.2098,
  1.211,
  1.214,
  1.2125,
  1.2084,
  1.2102,
  1.2108,
  1.209,
  1.2065,
  1.2073,
  1.2122,
  1.2061,
  1.2102,
  1.2059,
  1.2063,
  1.2123,
  1.2071,
  1.2116,
  1.2096,
  1.2091,
  1.2105,
  1.2098,
  1.2116,
  1.2071,
  1.2088,
  1.2101,
  1.209,
  1.2122],
 'accuracy': [46.05,
  50.42,
  53.93,
  57.21,
  57.84,
  59.72,
  59.7,
  60.13,
  60.45,
  60.24,
  60.49,
  60.6,
  60.72,
  60.63,
  60.69,
  60.68,
  60.73,
  60.75,
  60.74,
  60.76,
  60.75,
  60.75,
  60.75,
  60.74,
  60.73,
  60.73,
  60.73,
  60.73,
  60.73,
  60.73,
  60.73,
  60.73,
  60.73,
  60.73,
  60.73,
  60.73,
  60.73,
  60.73,
  60.73,
  60.73],
 'best_epoch': 20,
 'best_accuracy': 60.76}

Experiment 1

In [72]:
log_path = 'C:/PycharmProjects/revamped/ai-programming/log'

logs_df = pd.DataFrame(columns=['model', 'batch_size', 'epochs', 'learning_rate', 'device', 'best_epoch', 'best_accuracy', 'final_accuracy', 'losses', 'accuracy'])

for logfile_name in os.listdir(log_path):
    logs_df.loc[len(logs_df)] = pd.Series(read_training_log(os.path.join(log_path, logfile_name)))

logs_df.query('model == \'cnn\'')

Unnamed: 0,model,batch_size,epochs,learning_rate,device,best_epoch,best_accuracy,final_accuracy,losses,accuracy
0,cnn,16,5,0.0001,cuda,5,66.22,66.22,"[1.6467, 1.4012, 1.2711, 1.1815, 1.1187]","[49.29, 55.25, 60.53, 63.58, 66.22]"
1,cnn,16,5,0.001,cuda,5,66.48,66.48,"[1.504, 1.1934, 1.0893, 1.0292, 0.9826]","[56.64, 64.84, 65.22, 65.6, 66.48]"
2,cnn,16,5,0.01,cuda,5,23.25,23.25,"[2.3096, 2.3383, 2.3126, 2.315, 2.2814]","[10.0, 10.0, 10.01, 10.0, 23.25]"
3,cnn,16,5,0.1,cuda,1,10.0,10.0,"[6.192, 2.4063, 88.2375, 523.6257, 3329.7394]","[10.0, 10.0, 10.0, 10.0, 10.0]"
4,cnn,16,10,0.0001,cuda,10,67.52,67.52,"[1.6438, 1.3855, 1.2674, 1.1842, 1.1244, 1.056...","[51.06, 56.91, 60.94, 63.8, 65.93, 66.51, 66.7..."
...,...,...,...,...,...,...,...,...,...,...
59,cnn,128,20,0.1,cuda,1,10.0,10.0,"[24.8598, 2.307, 2.3078, 2.3082, 2.5967, 2.303...","[10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10...."
60,cnn,128,40,0.0001,cuda,18,58.07,58.07,"[1.837, 1.565, 1.4684, 1.4025, 1.3544, 1.3077,...","[43.13, 47.65, 52.76, 55.07, 56.37, 57.05, 57...."
61,cnn,128,40,0.001,cuda,11,74.34,74.31,"[1.5255, 1.2007, 1.0684, 0.9906, 0.923, 0.8262...","[59.13, 63.16, 67.96, 69.34, 71.27, 72.96, 73...."
62,cnn,128,40,0.01,cuda,1,10.0,10.0,"[2.3278, 2.3032, 2.3033, 2.3034, 2.3034, 2.302...","[10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10...."


In [74]:
logs_df.groupby(['batch_size']).agg({'best_accuracy': ['mean', 'std']}).reset_index()

Unnamed: 0_level_0,batch_size,best_accuracy,best_accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std
0,16,43.31125,22.888237
1,32,40.218437,23.276494
2,64,41.363529,28.14407
3,128,39.499375,28.000216


In [75]:
logs_df.groupby(['epochs']).agg({'best_accuracy': ['mean', 'std']}).reset_index()

Unnamed: 0_level_0,epochs,best_accuracy,best_accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std
0,5,34.408,24.897286
1,10,40.424167,25.771418
2,20,46.64,22.473082
3,40,44.099167,24.531294


In [76]:
logs_df.groupby(['learning_rate']).agg({'best_accuracy': ['mean', 'std']}).reset_index()

Unnamed: 0_level_0,learning_rate,best_accuracy,best_accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std
0,0.0001,57.6272,7.264783
1,0.001,62.702917,15.361688
2,0.01,27.486667,19.355145
3,0.1,16.787083,15.768022


Experiment 2

In [81]:
log_path = 'C:/PycharmProjects/revamped/ai-programming/log2'

logs_df = pd.DataFrame(columns=['model', 'batch_size', 'epochs', 'learning_rate', 'device', 'best_epoch', 'best_accuracy', 'final_accuracy', 'losses', 'accuracy'])

for logfile_name in os.listdir(log_path):
    logs_df.loc[len(logs_df)] = pd.Series(read_training_log(os.path.join(log_path, logfile_name)))

logs_df.query('epochs == 20')

Unnamed: 0,model,batch_size,epochs,learning_rate,device,best_epoch,best_accuracy,final_accuracy,losses,accuracy
3,cnn,8,20,3e-05,cuda,12,61.35,61.15,"[1.769, 1.5146, 1.4161, 1.3445, 1.2904, 1.2351...","[45.77, 50.37, 53.33, 55.65, 58.44, 59.92, 60...."
4,cnn,8,20,0.0001,cuda,16,69.54,69.45,"[1.6306, 1.3442, 1.2079, 1.1205, 1.0648, 0.986...","[52.98, 60.03, 62.68, 63.9, 66.14, 68.68, 68.6..."
5,cnn,8,20,0.0003,cuda,17,75.77,75.7,"[1.4824, 1.1542, 1.0367, 0.9536, 0.9015, 0.796...","[59.27, 63.56, 66.9, 69.77, 72.49, 74.29, 74.3..."
6,cnn,8,20,0.001,cuda,14,73.44,73.21,"[1.5066, 1.2262, 1.1395, 1.08, 1.0318, 0.897, ...","[55.43, 61.41, 65.07, 65.68, 67.09, 71.22, 71...."
7,cnn,8,20,0.003,cuda,18,67.4,67.28,"[2.2996, 1.9138, 1.7001, 1.6033, 1.5492, 1.403...","[15.33, 31.21, 40.62, 42.97, 47.35, 52.26, 56...."
8,cnn,16,20,3e-05,cuda,17,56.13,56.11,"[1.8283, 1.5868, 1.4852, 1.426, 1.386, 1.3355,...","[40.87, 47.41, 51.03, 53.17, 54.68, 55.05, 55...."
9,cnn,16,20,0.0001,cuda,16,67.7,67.57,"[1.6397, 1.3634, 1.2503, 1.1741, 1.1149, 1.044...","[53.24, 57.36, 60.88, 63.56, 65.7, 66.48, 66.9..."
10,cnn,16,20,0.0003,cuda,12,74.13,74.05,"[1.4999, 1.1756, 1.0529, 0.9801, 0.9261, 0.824...","[58.34, 65.25, 66.17, 69.38, 70.27, 73.02, 73...."
11,cnn,16,20,0.001,cuda,20,75.18,75.18,"[1.4705, 1.1686, 1.0635, 0.9935, 0.9613, 0.833...","[58.65, 64.6, 65.67, 68.56, 70.48, 73.08, 73.7..."
12,cnn,16,20,0.003,cuda,18,67.49,67.49,"[2.0459, 1.7528, 1.5899, 1.4962, 1.4129, 1.251...","[29.07, 42.47, 46.27, 48.66, 53.21, 58.95, 59...."


In [82]:
logs_df.groupby(['model']).agg({'best_accuracy': ['mean', 'std']}).reset_index()

Unnamed: 0_level_0,model,best_accuracy,best_accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std
0,cnn,67.591739,7.039065
1,cnn2,66.751,7.66751
2,fcnn,48.234,2.324554


In [83]:
logs_df.groupby(['batch_size']).agg({'best_accuracy': ['mean', 'std']}).reset_index()

Unnamed: 0_level_0,batch_size,best_accuracy,best_accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std
0,8,63.535556,10.63657
1,16,61.286667,10.956401
2,32,60.456,11.025051
3,64,58.968667,11.151845


In [84]:
logs_df.groupby(['learning_rate']).agg({'best_accuracy': ['mean', 'std']}).reset_index()

Unnamed: 0_level_0,learning_rate,best_accuracy,best_accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std
0,3e-05,53.687692,4.73107
1,0.0001,60.421538,7.635616
2,0.0003,65.380769,10.991919
3,0.001,65.1,13.83647
4,0.003,61.645,11.871847
