# Extract data from output files

### Code to extract timing information from output files of Lbann code
March 9, 2020

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import subprocess as sp
import os
import glob
import itertools

from ipywidgets import interact, interact_manual,fixed, SelectMultiple, RadioButtons

In [5]:
%matplotlib widget

## Extract training times

In [6]:
def f_extract_info(fname):
    '''
    Module to extract information from out.log files of Lbann training
    Reads in file name
    '''
    strg_lst=['objective','d_real','d_fake','gen','run time','mini-batch']
    keys=['training_'+strg for strg in strg_lst]
    dict1={}
    for category in ['training','validation']:
        for strg in strg_lst:
            key=category+'_'+strg
            cmd='grep "{0}" {1} | grep "{2}"'.format(category,fname,strg)
    #         print(cmd)
            op1=sp.check_output(cmd,shell=True).decode('utf-8').split('\n')
            obj=np.array([strg.split(':')[-1] for strg in op1 if strg])
            dict1[key]=obj
    
    df=pd.DataFrame([])
    key_lst=['training_objective', 'training_d_real', 'training_d_fake', 'training_gen', 'validation_objective', 'validation_d_real', 'validation_d_fake', 'validation_gen']
    col_list=['train_obj','train_dreal','train_dfake','train_gen','val_obj','val_dreal','val_dfake','val_gen']
    for col,key in zip(col_list,key_lst):
        df[col]=dict1[key].astype(np.float)

    ### Need to remove the trailing 's' in the timings
    for col,key in zip(['train_time','val_time'],['training_run time','validation_run time']):
        df[col]=np.array([i[:-1] for i in dict1[key]]).astype(np.float)

    for col,key in zip(['train_batch_stats','val_batch_stats'],['training_mini-batch','validation_mini-batch']):
        df[col]=dict1[key]
        
    return df

In [7]:
# fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200331_131011_exagan/slurm-513349.out'

# fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200406_080207_exagan_with_mcr/out.log'


fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200409_083646_exagan_with_mcr/slurm-533243.out'
fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200409_084926_exagan_no_mcr/slurm-533571.out'

### Extract information from log file
df=f_extract_info(fname)

In [8]:
# df.columns
df.head()
# col_list=['train_obj', 'train_dreal', 'train_dfake', 'train_gen', 'val_obj', 'val_dreal', 'val_dfake', 'val_gen', 'train_time', 'val_time']
# df[col_list]

Unnamed: 0,train_obj,train_dreal,train_dfake,train_gen,val_obj,val_dreal,val_dfake,val_gen,train_time,val_time,train_batch_stats,val_batch_stats
0,5.12535,0.556332,1.00253,3.56649,7.09542,0.169806,6.83975,0.085863,60.7266,6.24941,"0.0408028s mean, 2.21193s max, 0.00929616s mi...","0.0378445s mean, 0.138023s max, 0.0262676s mi..."
1,4.33604,0.423326,0.532036,3.38068,7.98899,1.47126,0.086133,6.4316,58.1091,5.61386,"0.0390275s mean, 0.0708043s max, 0.0215448s m...","0.0339907s mean, 0.0494954s max, 0.00281941s ..."
2,5.10068,0.36275,0.426021,4.3119,4.6704,0.353497,0.094371,4.22254,58.1298,5.8468,"0.0390748s mean, 0.142523s max, 0.0169916s mi...","0.0354038s mean, 0.0501632s max, 0.00889575s ..."
3,5.49655,0.349429,0.409576,4.73755,2.44828,0.06584,0.321069,2.06137,58.1427,5.95744,"0.0390611s mean, 0.0724607s max, 0.0194566s m...","0.0360739s mean, 0.0614507s max, 0.0049039s m..."
4,5.93784,0.328057,0.376206,5.23358,3.40172,0.101533,3.2194,0.080783,58.2104,5.69982,"0.0391176s mean, 0.144861s max, 0.0156823s mi...","0.0345126s mean, 0.0506677s max, 0.00278231s ..."


In [9]:
def f_plot(df,col_list=['train_obj']):
    '''
    Plot multiple columns of the dataframe
    '''
    plt.figure()
    
    marker_lst=('o','*','H','D','.','x')
    marker=itertools.cycle(marker_lst)
    for col in col_list: plt.plot(df[col],linestyle='',marker=next(marker),label=col)
    plt.legend()
    plt.xlabel('Epoch')

f_plot(df,col_list=['train_obj','train_dfake','train_dreal','train_gen'])

# plt.savefig('fig2.png')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [10]:
### Compare different quantities

col_list=['train_obj', 'train_dreal', 'train_dfake', 'train_gen', 'val_obj',
       'val_dreal', 'val_dfake', 'val_gen', 'train_time', 'val_time']
interact_manual(f_plot,col_list=SelectMultiple(options=col_list),df=fixed(df))


interactive(children=(SelectMultiple(description='col_list', options=('train_obj', 'train_dreal', 'train_dfake…

<function __main__.f_plot(df, col_list=['train_obj'])>