# Extract data from output files

### Code to extract timing information from output files of Lbann code
March 9, 2020

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import subprocess as sp
import os
import glob
import itertools

from ipywidgets import interact, interact_manual,fixed, SelectMultiple, RadioButtons

In [2]:
%matplotlib widget

## Extract training times

In [3]:
def f_extract_info(fname):
    '''
    Module to extract information from out.log files of Lbann training
    Reads in file name
    '''
    strg_lst=['objective','d_real','d_fake','gen','run time','mini-batch']
    keys=['training_'+strg for strg in strg_lst]
    dict1={}
    for category in ['training','validation']:
        for strg in strg_lst:
            key=category+'_'+strg
            cmd='grep "{0}" {1} | grep "{2}"'.format(category,fname,strg)
    #         print(cmd)
            op1=sp.check_output(cmd,shell=True).decode('utf-8').split('\n')
            obj=np.array([strg.split(':')[-1] for strg in op1 if strg])
            dict1[key]=obj
    
    df=pd.DataFrame([])
    key_lst=['training_objective', 'training_d_real', 'training_d_fake', 'training_gen', 'validation_objective', 'validation_d_real', 'validation_d_fake', 'validation_gen']
    col_list=['train_obj','train_dreal','train_dfake','train_gen','val_obj','val_dreal','val_dfake','val_gen']
    for col,key in zip(col_list,key_lst):
        df[col]=dict1[key].astype(np.float)

    ### Need to remove the trailing 's' in the timings
    for col,key in zip(['train_time','val_time'],['training_run time','validation_run time']):
        df[col]=np.array([i[:-1] for i in dict1[key]]).astype(np.float)

    for col,key in zip(['train_batch_stats','val_batch_stats'],['training_mini-batch','validation_mini-batch']):
        df[col]=dict1[key]
        
    return df

In [6]:
### Extract information from log file

parent_dir='/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/'
fldr_name='20200811_195351_bsize256_8gpurun_noconvbrelu'
main_dir=parent_dir+'{0}/dump_outs/trainer0/model0/'.format(fldr_name)
print(main_dir)
strg=parent_dir+'{0}/out.log'.format(fldr_name)
fname=glob.glob(strg)[0]
print(fname)
df=f_extract_info(fname)

/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20200811_195351_bsize256_8gpurun_noconvbrelu/dump_outs/trainer0/model0/
/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20200811_195351_bsize256_8gpurun_noconvbrelu/out.log
/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20200811_195351_bsize256_8gpurun_noconvbrelu/out.log


In [7]:
# df.columns
df.head()


Unnamed: 0,train_obj,train_dreal,train_dfake,train_gen,val_obj,val_dreal,val_dfake,val_gen,train_time,val_time,train_batch_stats,val_batch_stats
0,2.78891,0.187376,0.415729,2.1858,0.128862,0.065668,0.060969,0.002225,99.2205,4.80108,"0.112764s mean, 5.05943s max, 0.0775405s min,...","0.0489462s mean, 0.354378s max, 0.0408807s mi..."
1,0.161344,0.067305,0.082051,0.011988,0.116865,0.058619,0.057962,0.000283,84.6857,4.44277,"0.0962267s mean, 3.83188s max, 0.0707859s min...","0.0452907s mean, 0.0624387s max, 0.0297546s m..."
2,0.126294,0.060192,0.065515,0.000587,0.113703,0.056622,0.057073,9e-06,82.5098,4.51047,"0.0937491s mean, 2.19639s max, 0.0743914s min...","0.0459824s mean, 0.139467s max, 0.0294821s mi..."
3,0.127909,0.062444,0.065376,8.9e-05,0.123719,0.061889,0.061804,2.6e-05,82.0358,5.2937,"0.0932131s mean, 2.24744s max, 0.0725425s min...","0.0539747s mean, 0.83061s max, 0.0306515s min..."
4,0.128466,0.062504,0.065931,3.1e-05,0.111783,0.056533,0.055248,2e-06,90.4047,4.71571,"0.102733s mean, 4.11479s max, 0.0748229s min,...","0.0480757s mean, 0.0874491s max, 0.0310204s m..."


In [8]:
def f_plot(df,col_list=['train_obj']):
    '''
    Plot multiple columns of the dataframe
    '''
    plt.figure()
    
    marker_lst=('o','*','H','D','.','x')
    marker=itertools.cycle(marker_lst)
    for col in col_list: plt.plot(df[col],linestyle='',marker=next(marker),label=col)
    plt.legend()
    plt.xlabel('Epoch')

f_plot(df,col_list=['train_obj','train_dfake','train_dreal','train_gen'])

# plt.savefig('fig2.png')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [9]:
### Compare different quantities
col_list=['train_obj', 'train_dreal', 'train_dfake', 'train_gen', 'val_obj',
       'val_dreal', 'val_dfake', 'val_gen', 'train_time', 'val_time']
interact_manual(f_plot,col_list=SelectMultiple(options=col_list),df=fixed(df))


interactive(children=(SelectMultiple(description='col_list', options=('train_obj', 'train_dreal', 'train_dfake…

<function __main__.f_plot(df, col_list=['train_obj'])>