In [1]:
import re
import os
import itertools
import kaleido
import plotly.graph_objects as go
import plotly.io as pio
import glob
import numpy as np


#RE_EPOCH = re.compile(r"Epoch\s+(\d+):.*Accuracy:\s+(\d+\.\d+)%\/(\d+\.\d+)%\s+\|\s+Precision:\s+(\d+\.\d+)%\/(\d+\.\d+)%\s+\|\s+Recall:\s+(\d+\.\d+)%\/(\d+\.\d+)%\s+\|\s+F1:\s+(\d+\.\d+)%\/(\d+\.\d+)%")
RE_EPOCH_GARBAGE = re.compile(r"Epoch\s+(\d+):.*Accuracy:\s+(\d+\.\d+)%\/(\d+\.\d+)%(?:.|[\r\n])+?\|\s+Precision:\s+(\d+\.\d+)%\/(\d+\.\d+)%\s+\|\s+Recall:\s+(\d+\.\d+)%\/(\d+\.\d+)%\s+\|\s+F1:\s+(\d+\.\d+)%\/(\d+\.\d+)%")

def plot_single(log_file):
    epochs = []
    train_acc = []
    test_acc = []
    train_prec = []
    test_prec = []
    train_recall = []
    test_recall = []
    train_f1 = []
    test_f1 = []

    with open(log_file, "r", encoding='utf-8') as f:
        file_content = f.read()
        for match in re.finditer(RE_EPOCH_GARBAGE, file_content):
            epoch, train_acc_, test_acc_, train_prec_, test_prec_, train_recall_, test_recall_, train_f1_, test_f1_ = match.groups()
            epochs.append(int(epoch))
            train_acc.append(float(train_acc_))
            test_acc.append(float(test_acc_))
            train_prec.append(float(train_prec_))
            test_prec.append(float(test_prec_))
            train_recall.append(float(train_recall_))
            test_recall.append(float(test_recall_))
            train_f1.append(float(train_f1_))
            test_f1.append(float(test_f1_))
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=epochs, y=train_acc, name="Train Accuracy", visible=True, line={"color": "red"}))
    fig.add_trace(go.Scatter(x=epochs, y=test_acc, name="Test Accuracy", visible=True, line={"color": "blue"}))
    fig.add_trace(go.Scatter(x=epochs, y=train_prec, name="Train Precision", visible=True, line={"color": "green"}))
    fig.add_trace(go.Scatter(x=epochs, y=test_prec, name="Test Precision", visible=True, line={"color": "orange"}))
    fig.add_trace(go.Scatter(x=epochs, y=train_recall, name="Train Recall", visible=True, line={"color": "purple"}))
    fig.add_trace(go.Scatter(x=epochs, y=test_recall, name="Test Recall", visible=True, line={"color": "pink"}))
    fig.add_trace(go.Scatter(x=epochs, y=train_f1, name="Train F1", visible=True, line={"color": "brown"}))
    fig.add_trace(go.Scatter(x=epochs, y=test_f1, name="Test F1", visible=True, line={"color": "gray"}))

    buttons = [{
        "label": "All",
        "method": "update",
        "args": [
            {"visible": [True for t in fig.data]},
            {"title": "Training Metrics",
            "yaxis_title": "Metric Score",
            "showlegend": True}
        ]
    }]

    metrics = [
        ("Train Accuracy", 100 * (train_acc[-1] - train_acc[0]) / train_acc[0]),
        ("Test Accuracy", 100 * (test_acc[-1] - test_acc[0]) / test_acc[0]),
        ("Train Precision", 100 * (train_prec[-1] - train_prec[0]) / train_prec[0]),
        ("Test Precision", 100 * (test_prec[-1] - test_prec[0]) / test_prec[0]),
        ("Train Recall", 100 * (train_recall[-1] - train_recall[0]) / train_recall[0]),
        ("Test Recall", 100 * (test_recall[-1] - test_recall[0]) / test_recall[0]),
        ("Train F1", 100 * (train_f1[-1] - train_f1[0]) / train_f1[0]),
        ("Test F1", 100 * (test_f1[-1] - test_f1[0]) / test_f1[0]),
    ]

    for i, trace in enumerate(fig.data):
        metric_increase = next((inc for name, inc in metrics if name == trace.name), 0)
        button = {
            "label": f"{metric_increase:.2f}% {trace.name}",
            "method": "update",
            "args": [
                {"visible": [t.name == trace.name for t in fig.data]},
                {"title": f"Training Metrics - {trace.name}",
                "yaxis_title": trace.name,
                "showlegend": True},
        ]}
        buttons.append(button)

    fig.update_layout(updatemenus=[{
        "active": 0,
        "buttons": buttons,
        "x": 0.05,
        "y": 1.1,
        "direction": "down",
        "showactive": True,
    }])

    fig.update_layout(
        title=f"Training Metrics - {log_file}",
        xaxis_title="Epoch",
        yaxis_title="Metric Score",
        showlegend=True
    )

    pio.show(fig, renderer="vscode")
    return fig

In [2]:
def process_log_files(log_dir, img_dir):
    log_files = glob.glob(os.path.join(log_dir, '*.log'))
    log_files = sorted(log_files)

    with open('plots.md', 'w') as f:
        f.write(f'# Plots\n')
        for log_file in log_files:
            fname = os.path.splitext(os.path.basename(log_file))[0] + '.svg'
            fpath = os.path.join(img_dir, fname)

            fig = plot_single(log_file=log_file)
            epoch_n = len(fig.data[0].y)

            fig.write_image(fpath, format='svg', width=1024, height=576)
            f.write(f'## {fname}\n')
            f.write(f'![{fname}]({fpath})\n')
            f.write(f'| Metric | Epoch 1 | Epoch {epoch_n} | &#8710;% last | Max | &#8710;% max | Std Dev |\n')
            f.write(f'| - | - | - | - | - | - | - |\n')
            for trace in fig.data:
                y = trace.y
                epoch1_y = y[0]
                epochn_y = y[-1]
                max_y = np.max(y)
                std_y = np.std(y)

                d100 = (epochn_y - epoch1_y) / epoch1_y * 100
                dmax = (max_y - epoch1_y) / epoch1_y * 100

                f.write(f'| {trace.name} | {epoch1_y:05.02f} | {epochn_y:05.02f} | {d100:05.02f} | {max_y:05.02f} | {dmax:05.02f} | {std_y:05.02f} |\n')

log_dir = "logs"
img_dir = "imgs"
os.makedirs(log_dir, exist_ok=True)
os.makedirs(img_dir, exist_ok=True)
process_log_files(log_dir, img_dir)

<font size="10">GitHub doesn't render the above plots, see [the non-interactive plots](plots.md) instead.</font>