# Extract data from output files

### Code to extract timing information from output files of Lbann code
March 9, 2020

In [8]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import subprocess as sp
import os
import glob
import itertools

from ipywidgets import interact, interact_manual,fixed, SelectMultiple, RadioButtons

In [9]:
%matplotlib widget

## Extract training times

In [10]:
def f_extract_info(fname):
    '''
    Module to extract information from out.log files of Lbann training
    Reads in file name
    '''
    strg_lst=['objective','d_real','d_fake','gen','run time','mini-batch']
    keys=['training_'+strg for strg in strg_lst]
    dict1={}
    for category in ['training','validation']:
        for strg in strg_lst:
            key=category+'_'+strg
            cmd='grep "{0}" {1} | grep "{2}"'.format(category,fname,strg)
    #         print(cmd)
            op1=sp.check_output(cmd,shell=True).decode('utf-8').split('\n')
            obj=np.array([strg.split(':')[-1] for strg in op1 if strg])
            dict1[key]=obj
    
    df=pd.DataFrame([])
    key_lst=['training_objective', 'training_d_real', 'training_d_fake', 'training_gen', 'validation_objective', 'validation_d_real', 'validation_d_fake', 'validation_gen']
    col_list=['train_obj','train_dreal','train_dfake','train_gen','val_obj','val_dreal','val_dfake','val_gen']
    for col,key in zip(col_list,key_lst):
        df[col]=dict1[key].astype(np.float)

    ### Need to remove the trailing 's' in the timings
    for col,key in zip(['train_time','val_time'],['training_run time','validation_run time']):
        df[col]=np.array([i[:-1] for i in dict1[key]]).astype(np.float)

    for col,key in zip(['train_batch_stats','val_batch_stats'],['training_mini-batch','validation_mini-batch']):
        df[col]=dict1[key]
        
    return df

In [11]:
### Extract information from log file

# fldr='20200506_121613_exagan_200k_samples'
fldr='20200513_121910_peters_dataset'
# fldr='20200518_200316_new_transform_mcr'
strg='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/{0}/slurm*'.format(fldr)
fname=glob.glob(strg)[0]
print(fname)
df=f_extract_info(fname)

/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200513_121910_peters_dataset/slurm-649887.out


In [12]:
# df.columns
df.head()
# col_list=['train_obj', 'train_dreal', 'train_dfake', 'train_gen', 'val_obj', 'val_dreal', 'val_dfake', 'val_gen', 'train_time', 'val_time']
# df[col_list]

Unnamed: 0,train_obj,train_dreal,train_dfake,train_gen,val_obj,val_dreal,val_dfake,val_gen,train_time,val_time,train_batch_stats,val_batch_stats
0,5.74373,0.548645,0.870959,4.32413,1.86304,0.407753,0.903616,0.551673,33.3848,2.59969,"0.0269089s mean, 2.25991s max, 0.0233279s min...","0.00839889s mean, 0.288749s max, 0.00618833s ..."
1,3.35684,0.507946,0.509923,2.33897,7.57753,1.95865,0.076199,5.54268,31.2513,2.08367,"0.0251765s mean, 0.0833974s max, 0.0182697s m...","0.0067227s mean, 0.0470178s max, 0.00622632s ..."
2,3.09356,0.542803,0.540152,2.01061,1.30469,0.228476,0.513721,0.562493,30.8999,2.10566,"0.0248913s mean, 0.178034s max, 0.0182741s mi...","0.00679497s mean, 0.0508984s max, 0.00621744s..."
3,3.06394,0.514935,0.508154,2.04085,3.5036,1.00023,0.078221,2.42515,30.9352,2.12661,"0.0249125s mean, 0.0794946s max, 0.0184069s m...","0.00686263s mean, 0.0655146s max, 0.00628522s..."
4,3.07937,0.51365,0.503059,2.06266,2.70953,0.119245,2.52507,0.065219,30.761,2.05313,"0.0247788s mean, 0.095029s max, 0.0181971s mi...","0.0066244s mean, 0.0576311s max, 0.00619922s ..."


In [13]:
def f_plot(df,col_list=['train_obj']):
    '''
    Plot multiple columns of the dataframe
    '''
    plt.figure()
    
    marker_lst=('o','*','H','D','.','x')
    marker=itertools.cycle(marker_lst)
    for col in col_list: plt.plot(df[col],linestyle='',marker=next(marker),label=col)
    plt.legend()
    plt.xlabel('Epoch')

f_plot(df,col_list=['train_obj','train_dfake','train_dreal','train_gen'])

# plt.savefig('fig2.png')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [14]:
### Compare different quantities

col_list=['train_obj', 'train_dreal', 'train_dfake', 'train_gen', 'val_obj',
       'val_dreal', 'val_dfake', 'val_gen', 'train_time', 'val_time']
interact_manual(f_plot,col_list=SelectMultiple(options=col_list),df=fixed(df))


interactive(children=(SelectMultiple(description='col_list', options=('train_obj', 'train_dreal', 'train_dfake…

<function __main__.f_plot(df, col_list=['train_obj'])>