# Extract data from output files

### Code to extract timing information from output files of Lbann code
March 9, 2020

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import subprocess as sp
import os
import glob
import itertools

from ipywidgets import *

In [4]:
%matplotlib widget

## Extract training times

In [5]:
def f_extract_info(fname):
    '''
    Module to extract information from out.log files of Lbann training
    Reads in file name
    '''
    strg_lst=['objective','d_real','d_fake','gen','spec_loss','run time','mini-batch']
    keys=['training_'+strg for strg in strg_lst]
    dict1={}
    for category in ['training','validation']:
        for strg in strg_lst:
            try: 
                key=category+'_'+strg
                cmd='grep "{0}" {1} | grep "{2}"'.format(category,fname,strg)
        #         print(cmd)
                op1=sp.check_output(cmd,shell=True).decode('utf-8').split('\n')
                obj=np.array([strg.split(':')[-1] for strg in op1 if strg])
                dict1[key]=obj
            except Exception as e:
                print(e)
                dict1[key]=np.nan
    
    df=pd.DataFrame([])
    key_lst=['training_objective', 'training_d_real', 'training_d_fake', 'training_gen', 'training_spec_loss','validation_objective', 'validation_d_real', 'validation_d_fake', 'validation_gen','validation_spec_loss']
    col_list=['train_obj','train_dreal','train_dfake','train_gen','train_spec','val_obj','val_dreal','val_dfake','val_gen','val_spec']
    for col,key in zip(col_list,key_lst):
        try: 
            df[col]=dict1[key].astype(np.float)
        except: pass

    ### Need to remove the trailing 's' in the timings
    for col,key in zip(['train_time','val_time'],['training_run time','validation_run time']):
        df[col]=np.array([i[:-1] for i in dict1[key]]).astype(np.float)

    for col,key in zip(['train_batch_stats','val_batch_stats'],['training_mini-batch','validation_mini-batch']):
        df[col]=dict1[key]
        
    return df

def f_plot_metrics(df,col_list=['train_obj']):
    '''
    Plot multiple columns of the dataframe
    '''
    plt.figure()
    
    marker_lst=('o','*','H','D','.','x')
    marker=itertools.cycle(marker_lst)
    for col in col_list: plt.plot(df[col],linestyle='',marker=next(marker),label=col)
    plt.legend()
    plt.xlabel('Epoch')

In [1]:
# ### Extract information from log file

# parent_dir='/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/'
# fldr_name='20201029_164804_bsize64_with_spec'
# strg=parent_dir+'{0}/out.log'.format(fldr_name)
# fname=glob.glob(strg)[0]
# print(fname)
# df=f_extract_info(fname)

In [6]:
dict1={'scratch':'/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/',
    'proj':'/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/'}

u=interactive(lambda x: dict1[x], x=Select(options=dict1.keys()))
display(u)


interactive(children=(Select(description='x', options=('scratch', 'proj'), value='scratch'), Output()), _dom_c…

In [7]:
parent_dir=u.result
dir_lst=[i.split('/')[-1] for i in glob.glob(parent_dir+'20*')]
w=interactive(lambda x: x, x=Dropdown(options=dir_lst))
display(w)

interactive(children=(Dropdown(description='x', options=('20200911_083711_bsize64_spec_test_128_nospec', '2020…

In [8]:
result=w.result
main_dir=parent_dir+result
print(main_dir)

strg=main_dir+'/out.log'
df=f_extract_info(glob.glob(strg)[0])

/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20200911_083711_bsize64_spec_test_128_nospec
Command 'grep "training" /global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20200911_083711_bsize64_spec_test_128_nospec/out.log | grep "spec_loss"' returned non-zero exit status 1.
Command 'grep "validation" /global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20200911_083711_bsize64_spec_test_128_nospec/out.log | grep "spec_loss"' returned non-zero exit status 1.


In [9]:
# df.columns
df.head()


Unnamed: 0,train_obj,train_dreal,train_dfake,train_gen,val_obj,val_dreal,val_dfake,val_gen,train_time,val_time,train_batch_stats,val_batch_stats
0,4.19567,0.578883,0.680755,2.93603,1.72152,0.61171,0.223336,0.886478,498.322,50.3682,"0.141527s mean, 4.11059s max, 0.0289866s min,...","0.128774s mean, 3.11338s max, 0.0121465s min,..."
1,2.66707,0.596432,0.591708,1.47893,2.52958,0.153765,2.30633,0.069492,531.914,56.5173,"0.151097s mean, 5.35489s max, 0.0256271s min,...","0.144478s mean, 5.11157s max, 0.0108759s min,..."
2,2.64776,0.536288,0.524679,1.58679,1.91999,0.10596,1.68925,0.124774,539.267,65.887,"0.153193s mean, 5.70697s max, 0.0245995s min,...","0.168431s mean, 5.3597s max, 0.012339s min, 0..."
3,2.7319,0.512756,0.499889,1.71926,2.25605,0.305098,0.276168,1.67478,496.604,45.7232,"0.141057s mean, 5.2066s max, 0.0287161s min, ...","0.116875s mean, 3.44983s max, 0.0436821s min,..."
4,2.81688,0.494211,0.480797,1.84187,1.63642,0.247458,0.425313,0.96365,578.781,69.7616,"0.164427s mean, 6.04391s max, 0.0244762s min,...","0.178374s mean, 5.81138s max, 0.0111102s min,..."


In [10]:


f_plot_metrics(df,col_list=['train_obj','train_dfake','train_dreal','train_gen'])

# plt.savefig('fig2.png')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [11]:
### Compare different quantities
col_list=['train_obj', 'train_dreal', 'train_dfake', 'train_gen', 'val_obj',
       'val_dreal', 'val_dfake', 'val_gen', 'train_time', 'val_time']
interact_manual(f_plot_metrics,col_list=SelectMultiple(options=col_list),df=fixed(df))


interactive(children=(SelectMultiple(description='col_list', options=('train_obj', 'train_dreal', 'train_dfake…

<function __main__.f_plot_metrics(df, col_list=['train_obj'])>