In [None]:
import pandas as pd
from pyprojroot import here
import matplotlib.pyplot as plt
import numpy as np
from nutils import name_mask

In [None]:
path = here() / 'logs/logger'

In [None]:
results = list()

for p in path.glob('*.log'):
    
    result = dict()
    
    # names
    stem = p.stem.split('-')
    result['target'] = stem[0]
    result['model'] = stem[1]
    result['fs'] = stem[2]
    result['hpo'] = stem[3]
    
    # parse
    
    try:
        table = pd.read_csv(p, sep=r'\\t', engine='python', names=['column'])
        table = table.column.str.split(' - ', expand=True)
        table.columns = ['timestamp', 'message']
        table.timestamp = pd.to_datetime(table.timestamp)
        result['ts_start'] = table.timestamp.iloc[0]
    except Exception as error:
        continue
    
    try:
        row = table[table.message == 'Starting hyperparameter optimisation as requested']
        result['ts_hpo_start'] = row.timestamp.iloc[0]
    except Exception as error:
        result['ts_hpo_start'] = table.timestamp.iloc[0]
        
    try:
        row = table[table.message == 'Fitting the model for the first time']
        result['ts_fit_start'] = row.timestamp.iloc[0]
    except Exception as error:
        result['ts_fit_start'] = None
    
    try:
        row = table[table.message == 'Starting backtesting on the test set']
        result['ts_backtest_start'] = row.timestamp.iloc[0]
    except Exception as error:
        result['ts_backtest_start'] = None
    
    try:
        row = table[table.message == 'Results persisted']
        result['ts_ready'] = row.timestamp.iloc[0]
    except Exception as error:
        result['ts_ready'] = None
    
    
    results.append(result)

In [None]:
df = pd.DataFrame(results)
df['name'] = df['target'] + '-' + df['model'] + '-' + df['fs'] + '-' + df['hpo']
df = df.set_index('name')

df['delta_total'] = (df.ts_ready - df.ts_start)
df['delta_hpo'] = (df.ts_fit_start - df.ts_hpo_start)
df['delta_fit'] = (df.ts_backtest_start - df.ts_fit_start)
df['delta_backtest'] = (df.ts_ready - df.ts_backtest_start)

df.delta_total = df.delta_total.apply(lambda x: x.seconds / (60 * 60))
df.delta_hpo = df.delta_hpo.apply(lambda x: x.seconds / (60 * 60))
df.delta_backtest = df.delta_backtest.apply(lambda x: x.seconds / (60 * 60))
df.delta_fit = df.delta_fit.apply(lambda x: x.seconds / (60 * 60))

In [None]:
view = df[['delta_hpo', 'delta_fit', 'delta_backtest']]
view['delta_total'] = view.sum(axis=1)
view = view.sort_values(by='delta_total')
view.drop(columns='delta_total', inplace=True)

fig, ax = plt.subplots()

view.plot.barh(stacked=True, width=.8, edgecolor='k', lw=.5, ax=ax);
ax.set_xlabel('Runtime (h)');
ax.legend(loc=4);
#ax.set_xscale('log')
plt.savefig(here() / 'output/plots/runtime.jpg', dpi=300, bbox_inches='tight')

In [None]:
table = df[['model', 'fs', 'hpo', 'delta_hpo', 'delta_fit', 'delta_backtest', 'delta_total']]
table['delta_total'] = table[['delta_hpo', 'delta_fit', 'delta_backtest']].sum(axis=1)
#table['delta_total'].fillna(0, inplace=True)
table = table[table.hpo=='1']
table = table.reset_index()
table = table.drop(columns=['name', 'hpo'])
table.model = table.model.replace(name_mask)
table = table.rename(columns={
    'delta_fit' : 'Fit', 
    'delta_backtest': 'Backtest',
    'delta_total' : 'Total',
    'delta_hpo' : 'HPO',
    'model' : 'Model',
    'fs':'FS'})

table = table.sort_values('Total')
#table = table.round(2)
table.FS = table.FS.str.upper()
table['Name'] = table.Model + '-' + table.FS
table = table.set_index('Name')
table = table.drop(columns=['Model', 'FS'])

#table.loc['Total'] = table.sum()
table = table
table = table.round(2)
#table.replace(0.0

In [None]:
table = table.replace(0.00, '<0.01')

In [None]:
hw = ['CPU', 'CPU', 'CPU', 'CPU', 'GPU2', 'CPU', 'GPU1', 'GPU1', 'GPU1', 'CPU', 'GPU2']

In [None]:
table['Accelerator'] = hw

In [None]:
table = table.drop(['SN-U', 'HWAM-U', 'HWDM-U', 'ARIMA-U'])

In [None]:
table.to_latex(
    buf=here() / 'output/tables/runtime.tex',
    column_format='rccccc',
    label='tab:runtime',
    caption='Computational cost measured in walltime hours for hyperparameter optimisation (HPO),\
        parameter estimation and backtesting in minutes. CPU=Intel Xeon Cascade Lake 2,1 GHz,\
        GPU1=Nvidia Volta V100, GPU2=Nvidia Ampere A100',
    position='H',
    index=True,
    na_rep='-'
)