In [30]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [103]:
import pandas as pd
import os
import re
import codecs
from IPython.display import display
from six.moves import cPickle as pickle
import string
from PIL import Image
import numpy as np
import h5py

In [104]:
width = None
pd.options.display.max_rows = 600
pd.options.display.max_columns = width
pd.options.display.max_colwidth = 600
pd.options.display.width = width
pd.options.display.max_seq_items = None
pd.options.display.expand_frame_repr = False
pd.options.display.colheader_justify = 'left'

In [105]:
import data_commons as dtc
import dl_commons as dlc

In [125]:
class VisualizeDir(object):
    def __init__(self, storedir, gen_datadir='../data/generated2'):
        self._storedir = storedir
        self._logdir = os.path.join(storedir, '..')
        self._hyper = dtc.load(self._logdir, 'hyper.pkl')
        self._args = dtc.load(self._logdir, 'args.pkl')

        self._word2id = pd.read_pickle(os.path.join(gen_datadir, 'dict_vocab.pkl'))
        i2w = pd.read_pickle(os.path.join(gen_datadir, 'dict_id2word.pkl'))
        for i in range(-1,-11,-1):
            i2w[i] = '%d'%i
        self._id2word = {}
        ## Append space after all commands beginning with a backslash (except backslash alone)
        for i, w in i2w.items():
            if w[0] == '\\':
              self._id2word[i] = w + " "  
            else:
                self._id2word[i] = w 
        self._id2word[self._word2id['id']['\\']] = '\\'
    
    @property
    def storedir(self):
        return self._storedir
    
    @property
    def w2i(self):
        return self._word2id['id']

    @property
    def i2w(self):
        return self._id2word
    
    @property
    def max_steps(self):
        steps = [int(os.path.basename(f).split('_')[-1].split('.')[0]) for f in os.listdir(self._storedir)]
        epoch_steps = [int(os.path.basename(f).split('_')[-1].split('.')[0]) for f in os.listdir(self._storedir) if f.startswith('validation')]
        return sorted(steps)[-1], sorted(epoch_steps)[-1]
        
    @property
    def args(self):
        return self._args
    
    @property
    def hyper(self):
        return self._hyper
    
    def keys(self, graph, step):
        with h5py.File(os.path.join(self._storedir, '%s_%d.h5'%(graph,step))) as h5:
            return h5.keys()

    def np(self, graph, step, key):
        """
        Args:
            graph: 'training' or 'validation'
            step:  step who's output is to be fetched
            key:   key of object to fetch - e.g. 'predicted_ids'
        """
        with h5py.File(os.path.join(self._storedir, '%s_%d.h5'%(graph,step))) as h5:
            return h5[key][...]
    
    def df(self, graph, step, key):
        return pd.DataFrame(self.np(graph, step, key))
    
    def words(self, graph, step, key, key2=None):
        df = self.df(graph, step, key)
        df2 = self.df(graph, step, key2) if (key2 is not None) else None
        
        if key2 is None:
            return df.applymap(lambda x: self._id2word[x])
        else:
            return pd.DataFrame({'%s'%key: df.applymap(lambda x: self._id2word[x]), '%s'%key2: df2.applymap(lambda x: self._id2word[x])})

    def strs(self, graph, step, key, key2=None, mingle=True):
        df_str = self.words(graph, step, key)
        df_str2 = self.words(graph, step, key2) if (key2 is not None) else None
        
        ## each token's string version - excepting backslash - has a space appended to it,
        ## therefore the string output should be compile if the prediction was syntactically correct
        if key2 == None:
            return pd.DataFrame(["".join(row) for row in df_str.itertuples(index=False)])
        else:
            if mingle:
                ar1 = ["".join(row) for row in df_str.itertuples(index=False)]
                ar2 = ["".join(row) for row in df_str2.itertuples(index=False)]
                data = {'%s_%d %s / %s\t\t(%s)'%(graph, step, key, key2, self._storedir): [e for t in zip(ar1, ar2) for e in t]}
            else:
                data = {'%s_%d.%s\t\t(%s)'%(graph, step, key, self._storedir): ["".join(row) for row in df_str.itertuples(index=False)], '%s_%d.%s\t\t(%s)'%(graph, step, key2, self._storedir): ["".join(row) for row in df_str2.itertuples(index=False)]}

            df = pd.DataFrame(data)
#             df.style.set_caption('%s/%s_%s'%(self._storedir, graph, step))
            return df
        
    def prune_logs(self, save_epochs=1):
        """Save the latest save_epochs logs and remove the rest."""
        epoch_steps = [int(os.path.basename(f).split('_')[-1].split('.')[0]) for f in os.listdir(self._storedir) if f.startswith('validation')]
        if len(epoch_steps) <= save_epochs:
            print('Only %d full epochs were found. Deleting nothing.'%epoch_steps)
            return False
        else:
            epoch_steps.sort(reverse=True)
            max_step = epoch_steps[save_epochs]
            training_steps = [int(os.path.basename(f).split('_')[-1].split('.')[0]) for f in os.listdir(self._storedir) if f.startswith('training')]
            steps_to_remove = filter(lambda s: (s<max_step) and (s not in epoch_steps), training_steps)
            files_to_remove = [f for f in os.listdir(self._storedir) if f.startswith('training') and (int(os.path.basename(f).split('_')[-1].split('.')[0]) in steps_to_remove) ]
            print 'The following files will be removed', files_to_remove

class VisualizeStep():
    def __init__(self, visualizer, step):
        self._step = step
        self._visualizer = visualizer
        
    def keys(self, graph):
        return self._visualizer.keys(graph, self._step)
    
    def np(self, graph, key):
        return self._visualizer.np(graph, self._step, key)
    
    def df(self, graph, step, key):
        return pd.DataFrame.df(self.np(graph, step, key))
    
    def words(self, graph, key, key2=None):
        return self._visualizer.words(graph, self._step, key, key2)

    def strs(self, graph, key, key2=None, mingle=True):
        return self._visualizer.strs(graph, self._step, key, key2, mingle)

class DiffParams(object):
    def __init__(self, dir1, dir2):
        self._dir1 = dir1
        self._dir2 = dir2
        
    def get(self, filename, to_str):
        one = dtc.load(self._dir1, filename)
        two = dtc.load(self._dir2, filename)
        if (to_str):
            one = dlc.to_dict(one)
            two = dlc.to_dict(two)
        return one, two

    def print_dict(self, filename, to_str):
        one, two = self.get(filename, to_str)
        dtc.pprint(dlc.diff_dict(one, two))
    
    def _table(self, filename):
        one, two = self.get(filename, False)
        head, tail = dlc.diff_table(one, two)
        display(pd.DataFrame(head))
        display(pd.DataFrame(tail))
        
    def args(self, to_str=True):
        self._table('args.pkl')        
        
    def hyper(self, to_str=True):
        self._table('hyper.pkl')
    
    def get_args(self):
        return self.get('args.pkl', to_str=True)
    def get_hyper(self):
        return self.get('hyper.pkl', to_str=True)

In [163]:
# v = Visualize('./tb_metrics_dev/2017-10-06 17-56-47 PDT/store', '../data/generated2')
# v = VisualizeDir('./tb_metrics/2017-10-08 12-26-45 PDT/store')
# v = VisualizeDir('./tb_metrics_dev/2017-10-09 12-45-15 PDT/store')
# vd = VisualizeDir('./tb_metrics/2017-10-09 17-43-49 PDT/store')
# vd = VisualizeDir('tb_metrics/2017-10-09 16-01-07 PDT/store')
# vd2 = VisualizeDir('tb_metrics/2017-09-26 22-40-18 PDT/new_code 2017-10-10 15-10-17 PDT/store')
# vd = VisualizeDir('./tb_metrics/2017-10-07 14-33-35 PDT_my_decoder/store')
vd = VisualizeDir('./tb_metrics/2017-10-10 19-14-54 PDT/store')

In [164]:
display(vd.max_steps)
# display(vd2.max_steps)

(96400, 96348)

In [165]:
vs = VisualizeStep(vd, 96348)
vs.strs('validation', 'predicted_ids', 'y', mingle=False)

Unnamed: 0,validation_96348.predicted_ids (./tb_metrics/2017-10-10 19-14-54 PDT/store),validation_96348.y (./tb_metrics/2017-10-10 19-14-54 PDT/store)
0,y^2=(rN)^2-2A^N(dA-R^{(0)}_pus-\Lambda ^{(3)})).\eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1,y^{2}=x^{2n_{c}}-4\Lambda ^{2}x^{n_{f}}=x^{n_{f}}(x+2\Lambda )(x-2\Lambda ).\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos
1,a_{\alpha }(p^\alpha )=\frac {N_c}{2N}\sum _{p=1}^N\left (X^+_n-n^2_n\right )\eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1,\label {eqthree}a_\alpha ({\bf x}_i):={\hbar \over e}{\theta \over \pi }\sum _{\{j;j\ne i\}}\nabla _\alpha ^{(i)}\varphi _{ij}\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos
2,S=-\frac {1}{f}\int [d\phi ]_{D_{2}}(t)du(A)^{a_{3}}(A)\label {a}\eos \eos \eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1,"S=-\;\frac {1}{4}\;\int d^{4}x(G_{\mu \nu }(A),G^{\mu \nu }(A))\label {eq:action}\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"
3,H_t=\int d^2x(\frac {1}{2}\varepsilon ^{ab}F_{ab}E^c_a-\frac {1}{2}\varepsilon ^{ab}B_aS_{ab}]\label {10}\eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1,H_r=\int d^2x~[\frac {1}{2}(\epsilon ^{ij}\partial _iB_j)^2-\frac {1}{2}m^2B_iB^i]\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos
4,"\psi _{R}[H_{b},H_{R}|T^{R}\rangle =H_{R}(T_{B}-H_{R}-H_{0})+H_{1}(H_{C})+H_{1,b}\xi _{r}.\eos \eos \eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1","\gamma _{5}H^{2}=(\gamma _{5}H+H\gamma _{5})H-H(\gamma _{5}H+H\gamma _{5})+H^{2}\gamma _{5}=H^{2}\gamma _{5},\eos \eos \eos \eos"
5,"\lambda '^{\mu }+\bar {\eta }^{\mu }\beta '',~~~~~\gamma ^{\mu }\gamma ^{\mu }Z_{\mu }=\bar {\theta }^{\mu }+\bar {\xi }^{\mu }^{a}.\label {quar$}\eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1","\lambda ^{\prime \prime }=\lambda ^{\prime }\lambda +\overline {\zeta }^{\prime }\xi ,\hspace {0.3in}\xi ^{\prime \prime }=\xi ^{\prime }\lambda +D^{\prime }\xi .\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"
6,"F_{\mu \nu }^{\,\,\,\,\,\,\,\rho }=g_{\mu \nu }^{1\alpha }(h)F_{\mu \nu }^{\,\,\,\,\,\,\mu }(h)-F_{\mu \nu }^{b\rho }(h)\eos \eos \eos \eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1","\Gamma ^{abc,r}_{\mu }(k,p)=\Gamma ^{abc}_{\mu }(k,p)-k_{\mu }\Gamma ^{abc}(b^2)\label {19}\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"
7,"L={1\over \lambda ^2}\,X^{\alpha \beta }fxr[U_{\mu \nu }(x)X^{\mu \nu }(t-X^0)X^{\mu \nu }(x,q)]\eos \eos \eos \eos \eos \eos \eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1",L={1\over 4{\lambda }}[{\dot X^{\mu }}{\dot X^{\nu }}G_{\mu \nu }(X)-c^2{X'^{\mu }}{X'^{\nu }}G_{\mu \nu }(X)]\eos \eos \eos \eos \eos
8,"\ln {\tilde {\mathcal {\mathcal {O}}}}\exp \left (m^{2}y^{2}\right )^{2}\{(E^{2}-w_{0}r^{2}x^{2}),\eos \eos \eos \eos \eos \eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1","\begin {array}{rcl}{\rm integral}&\approx &\displaystyle 2^{-1}(4\pi )^{-4}p^2\ln (m^4\eta ^2),\end {array}\eos \eos \eos \eos \eos \eos \eos"
9,"(2,1,1)T(D,E)-{\rm Tr}(F,D)={\rm S}(F/2)+{\rm 5}(F/2)\frac {(5\Gamma ')^2}{8}\langle {\cal O}].\eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1","(2-d)S(T_f,T_i)=[Q(T_f)+dT_fL(T_f)]-[Q(T_i)+dT_iL(T_i)].\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"


In [157]:
diff = DiffParams('./tb_metrics/2017-09-26 22-40-18 PDT', './tb_metrics/2017-10-07 14-33-35 PDT_2CALSTMs')
# diff = DiffParams('./tb_metrics/2017-10-07 14-33-35 PDT', './tb_metrics/2017-10-08 12-26-45 PDT')
# diff = DiffParams('./tb_metrics/2017-09-26 22-40-18 PDT', './tb_metrics/2017-10-08 12-26-45 PDT')
# diff = DiffParams('./tb_metrics/2017-09-26 22-40-18 PDT/w=1', './tb_metrics/2017-10-08 12-26-45 PDT')
# diff = DiffParams('./tb_metrics/2017-10-09 16-01-07 PDT_good', './tb_metrics/2017-10-09 17-43-49 PDT')
# diff = DiffParams('./tb_metrics/2017-09-26 22-40-18 PDT', './tb_metrics/2017-10-09 16-01-07 PDT_good')
diff.hyper()

Unnamed: 0,0,1
0,CALSTM_STACK_1.decoder_lstm.i ===> 576,CALSTM_STACK_1.decoder_lstm.i ===> 1512
1,CALSTM_STACK_1.CTCBlankTokenID ===> 556,CALSTM_STACK_1.CTCBlankTokenID ===> None
2,dtype_np ===> <type numpy.float32>,dtype_np ===> <type 'numpy.float32'>
3,CALSTM_STACK_1.dtype_np ===> <type numpy.float32>,CALSTM_STACK_1.dtype_np ===> <type 'numpy.float32'>
4,int_type_np ===> <type numpy.int32>,int_type_np ===> <type 'numpy.int32'>
5,CALSTM_STACK_1.use_ctc_loss ===> undefined,CALSTM_STACK_1.use_ctc_loss ===> False
6,CTCBlankTokenID ===> 556,CTCBlankTokenID ===> None
7,CALSTM_STACK_1.int_type_np ===> <type numpy.int32>,CALSTM_STACK_1.int_type_np ===> <type 'numpy.int32'>


Unnamed: 0,0,1
0,CALSTM_STACK_1.logger ===> <logging.Logger object at 0x7f4006201410>,CALSTM_STACK_1.logger ===> None
1,init_model.weights_initializer ===> <function _initializer at 0x7f40062d3b18>,init_model.weights_initializer ===> <function _initializer at 0x7f0f4d0fa7d0>
2,CALSTM_STACK_1.biases_initializer ===> <tensorflow.python.ops.init_ops.Zeros object at 0x7f40061ea810>,CALSTM_STACK_1.biases_initializer ===> <tensorflow.python.ops.init_ops.Zeros object at 0x7f850774c050>
3,output_layers.weights_initializer ===> <function _initializer at 0x7f40062d3b18>,output_layers.weights_initializer ===> <function _initializer at 0x7f0f4d0fa7d0>
4,init_model_final_layers.activation_fn ===> <function tanh at 0x7f4079de36e0>,init_model_final_layers.activation_fn ===> <function tanh at 0x7f0f551d3230>
5,embeddings_regularizer ===> <function l2 at 0x7f406bd2b8c0>,embeddings_regularizer ===> <function l2 at 0x7f0fd4396230>
6,output_layers.activation_fn ===> <function relu at 0x7f4079eeaa28>,output_layers.activation_fn ===> <function relu at 0x7f0f552c9578>
7,biases_initializer ===> <tensorflow.python.ops.init_ops.Zeros object at 0x7f40061ea810>,biases_initializer ===> <tensorflow.python.ops.init_ops.Zeros object at 0x7f850774c050>
8,embeddings_initializer ===> <function _initializer at 0x7f40062d3b18>,embeddings_initializer ===> <function _initializer at 0x7f0f4d0fa7d0>
9,CALSTM_STACK_1.att_layers.weights_initializer ===> <function _initializer at 0x7f40062d3b18>,CALSTM_STACK_1.att_layers.weights_initializer ===> <function _initializer at 0x7f0f4d0fa7d0>


In [158]:
diff.get_hyper()[1]['CALSTM_STACK'][1]

IndexError: list index out of range