# Extract data from output files

### Code to extract timing information from output files of Lbann code
March 9, 2020

In [12]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import subprocess as sp
import os
import glob
import itertools

from ipywidgets import interact, interact_manual,fixed, SelectMultiple, RadioButtons

In [13]:
%matplotlib widget

## Extract training times

In [14]:
def f_extract_info(fname):
    '''
    Module to extract information from out.log files of Lbann training
    Reads in file name
    '''
    strg_lst=['objective','d_real','d_fake','gen','run time','mini-batch']
    keys=['training_'+strg for strg in strg_lst]
    dict1={}
    for category in ['training','validation']:
        for strg in strg_lst:
            key=category+'_'+strg
            cmd='grep "{0}" {1} | grep "{2}"'.format(category,fname,strg)
    #         print(cmd)
            op1=sp.check_output(cmd,shell=True).decode('utf-8').split('\n')
            obj=np.array([strg.split(':')[-1] for strg in op1 if strg])
            dict1[key]=obj
    
    df=pd.DataFrame([])
    key_lst=['training_objective', 'training_d_real', 'training_d_fake', 'training_gen', 'validation_objective', 'validation_d_real', 'validation_d_fake', 'validation_gen']
    col_list=['train_obj','train_dreal','train_dfake','train_gen','val_obj','val_dreal','val_dfake','val_gen']
    for col,key in zip(col_list,key_lst):
        df[col]=dict1[key].astype(np.float)

    ### Need to remove the trailing 's' in the timings
    for col,key in zip(['train_time','val_time'],['training_run time','validation_run time']):
        df[col]=np.array([i[:-1] for i in dict1[key]]).astype(np.float)

    for col,key in zip(['train_batch_stats','val_batch_stats'],['training_mini-batch','validation_mini-batch']):
        df[col]=dict1[key]
        
    return df

In [17]:
# fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200331_131011_exagan/slurm-513349.out'

# fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200406_080207_exagan_with_mcr/out.log'

fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200407_093719_exagan_no_mcr/out.log'
### Extract information from log file
df=f_extract_info(fname)

In [18]:
# df.columns
df.head()
# col_list=['train_obj', 'train_dreal', 'train_dfake', 'train_gen', 'val_obj', 'val_dreal', 'val_dfake', 'val_gen', 'train_time', 'val_time']
# df[col_list]

Unnamed: 0,train_obj,train_dreal,train_dfake,train_gen,val_obj,val_dreal,val_dfake,val_gen,train_time,val_time,train_batch_stats,val_batch_stats
0,7.46832,0.849779,2.28201,4.33653,10.9853,1.61141,0.071444,9.30241,58.1469,6.46629,"0.242068s mean, 3.08499s max, 0.0291886s min,...","0.239425s mean, 0.533345s max, 0.21086s min, ..."
1,4.01244,0.659323,0.851929,2.50119,0.841458,0.365116,0.227073,0.249268,56.9083,6.44995,"0.236916s mean, 0.31946s max, 0.0512279s min,...","0.238851s mean, 0.267614s max, 0.107777s min,..."
2,3.88301,0.598949,0.82243,2.46163,6.24042,2.21516,0.080249,3.94502,56.8609,6.19941,"0.236691s mean, 0.328631s max, 0.0524485s min...","0.229572s mean, 0.271209s max, 0.116734s min,..."
3,4.1117,0.516936,0.720451,2.87432,0.589632,0.429318,0.093351,0.066963,55.875,6.14506,"0.232604s mean, 0.299107s max, 0.0515669s min...","0.22756s mean, 0.263859s max, 0.120102s min, ..."
4,4.0525,0.469154,0.614447,2.9689,1.6476,0.299654,1.23263,0.115319,55.7336,6.10921,"0.232031s mean, 0.359077s max, 0.0519128s min...","0.226233s mean, 0.25216s max, 0.127253s min, ..."


In [19]:
def f_plot(df,col_list=['train_obj']):
    '''
    Plot multiple columns of the dataframe
    '''
    plt.figure()
    
    marker_lst=('o','*','H','D','.','x')
    marker=itertools.cycle(marker_lst)
    for col in col_list: plt.plot(df[col],linestyle='',marker=next(marker),label=col)
    plt.legend()
    plt.xlabel('Epoch')

f_plot(df,col_list=['train_obj','train_dfake','train_dreal','train_gen'])

# plt.savefig('fig2.png')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [20]:
### Compare different quantities

col_list=['train_obj', 'train_dreal', 'train_dfake', 'train_gen', 'val_obj',
       'val_dreal', 'val_dfake', 'val_gen', 'train_time', 'val_time']
interact_manual(f_plot,col_list=SelectMultiple(options=col_list),df=fixed(df))


interactive(children=(SelectMultiple(description='col_list', options=('train_obj', 'train_dreal', 'train_dfake…

<function __main__.f_plot(df, col_list=['train_obj'])>