# Extract data from output files

### Code to extract timing information from output files of Lbann code
March 9, 2020

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import subprocess as sp
import os
import glob
import itertools

from ipywidgets import interact, interact_manual,fixed, SelectMultiple, RadioButtons

In [2]:
%matplotlib widget

## Extract training times

In [3]:
def f_extract_info(fname):
    '''
    Module to extract information from out.log files of Lbann training
    Reads in file name
    '''
    strg_lst=['objective','d_real','d_fake','gen','run time','mini-batch']
    keys=['training_'+strg for strg in strg_lst]
    dict1={}
    for category in ['training','validation']:
        for strg in strg_lst:
            key=category+'_'+strg
            cmd='grep "{0}" {1} | grep "{2}"'.format(category,fname,strg)
    #         print(cmd)
            op1=sp.check_output(cmd,shell=True).decode('utf-8').split('\n')
            obj=np.array([strg.split(':')[-1] for strg in op1 if strg])
            dict1[key]=obj
    
    df=pd.DataFrame([])
    key_lst=['training_objective', 'training_d_real', 'training_d_fake', 'training_gen', 'validation_objective', 'validation_d_real', 'validation_d_fake', 'validation_gen']
    col_list=['train_obj','train_dreal','train_dfake','train_gen','val_obj','val_dreal','val_dfake','val_gen']
    for col,key in zip(col_list,key_lst):
        df[col]=dict1[key].astype(np.float)

    ### Need to remove the trailing 's' in the timings
    for col,key in zip(['train_time','val_time'],['training_run time','validation_run time']):
        df[col]=np.array([i[:-1] for i in dict1[key]]).astype(np.float)

    for col,key in zip(['train_batch_stats','val_batch_stats'],['training_mini-batch','validation_mini-batch']):
        df[col]=dict1[key]
        
    return df

In [4]:
### Extract information from log file

# fldr='20200506_121613_exagan_200k_samples'
fldr='20200513_121910_peters_dataset'
fldr='20200529_063053_exagan_seed232_80epochs'
fldr='20200601_150741_seed2020_200epochs'

strg='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/{0}/out.log'.format(fldr)
fname=glob.glob(strg)[0]
print(fname)
df=f_extract_info(fname)

/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200601_150741_seed2020_200epochs/out.log


In [5]:
# df.columns
df.head()
# col_list=['train_obj', 'train_dreal', 'train_dfake', 'train_gen', 'val_obj', 'val_dreal', 'val_dfake', 'val_gen', 'train_time', 'val_time']
# df[col_list]

Unnamed: 0,train_obj,train_dreal,train_dfake,train_gen,val_obj,val_dreal,val_dfake,val_gen,train_time,val_time,train_batch_stats,val_batch_stats
0,5.90908,0.54371,0.832125,4.53324,1.24144,0.228442,0.863401,0.1496,48.5095,3.94297,"0.0391762s mean, 3.86967s max, 0.034704s min,...","0.0127595s mean, 0.279638s max, 0.0113598s mi..."
1,3.3784,0.535433,0.551983,2.29098,1.77139,0.164039,1.55879,0.048555,44.5457,3.61227,"0.0359619s mean, 0.450788s max, 0.0195584s mi...","0.0116864s mean, 0.0343763s max, 0.0107871s m..."
2,2.97232,0.552815,0.555762,1.86375,1.57801,0.446263,0.811268,0.32048,44.0139,3.63674,"0.0355307s mean, 0.265715s max, 0.0193284s mi...","0.0117661s mean, 0.0307579s max, 0.0107432s m..."
3,2.95137,0.533511,0.536263,1.8816,2.08638,0.70098,0.315275,1.07012,43.8381,3.60451,"0.0353882s mean, 0.105867s max, 0.0192078s mi...","0.0116617s mean, 0.0323967s max, 0.010791s mi..."
4,2.95366,0.523189,0.521724,1.90875,2.02548,0.417542,1.06059,0.547345,43.9531,3.58355,"0.0354814s mean, 0.323131s max, 0.0191207s mi...","0.0115932s mean, 0.0357811s max, 0.0107123s m..."


In [6]:
def f_plot(df,col_list=['train_obj']):
    '''
    Plot multiple columns of the dataframe
    '''
    plt.figure()
    
    marker_lst=('o','*','H','D','.','x')
    marker=itertools.cycle(marker_lst)
    for col in col_list: plt.plot(df[col],linestyle='',marker=next(marker),label=col)
    plt.legend()
    plt.xlabel('Epoch')

f_plot(df,col_list=['train_obj','train_dfake','train_dreal','train_gen'])

# plt.savefig('fig2.png')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [7]:
### Compare different quantities
col_list=['train_obj', 'train_dreal', 'train_dfake', 'train_gen', 'val_obj',
       'val_dreal', 'val_dfake', 'val_gen', 'train_time', 'val_time']
interact_manual(f_plot,col_list=SelectMultiple(options=col_list),df=fixed(df))


interactive(children=(SelectMultiple(description='col_list', options=('train_obj', 'train_dreal', 'train_dfake…

<function __main__.f_plot(df, col_list=['train_obj'])>