In [1]:
import pandas as pd
import os
import re
import codecs
from IPython.display import display
from six.moves import cPickle as pickle
import string
from PIL import Image
import numpy as np
import h5py

In [4]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [5]:
width = None
pd.options.display.max_rows = 600
pd.options.display.max_columns = width
pd.options.display.max_colwidth = 600
pd.options.display.width = width
pd.options.display.max_seq_items = None
pd.options.display.expand_frame_repr = False
pd.options.display.colheader_justify = 'left'

In [74]:
class Visualize(object):
    def __init__(self, storedir, gen_datadir):
        self._storedir = storedir
        self._word2id = pd.read_pickle(os.path.join(gen_datadir, 'dict_vocab.pkl'))
        i2w = pd.read_pickle(os.path.join(gen_datadir, 'dict_id2word.pkl'))
        for i in range(-1,-11,-1):
            i2w[i] = '%d'%i
        self._id2word = {}
        ## Append space after all commands beginning with a backslash (except backslash alone)
        for i, w in i2w.items():
            if w[0] == '\\':
              self._id2word[i] = w + " "  
            else:
                self._id2word[i] = w 
        self._id2word[self._word2id['id']['\\']] = '\\'
    
    @property
    def w2i(self):
        return self._word2id['id']

    @property
    def i2w(self):
        return self._id2word
    
    def keys(self, graph, step):
        with h5py.File(os.path.join(self._storedir, '%s_%d.h5'%(graph,step))) as h5:
            return h5.keys()

    def np(self, graph, step, key):
        """
        Args:
            graph: 'training' or 'validation'
            step:  step who's output is to be fetched
            key:   key of object to fetch - e.g. 'predicted_ids'
        """
        with h5py.File(os.path.join(self._storedir, '%s_%d.h5'%(graph,step))) as h5:
            return h5[key][...]
    
    def df(self, graph, step, key):
        return pd.DataFrame(self.np(graph, step, key))
    
    def words(self, graph, step, key):
        df = self.df(graph, step, key)
        return df.applymap(lambda x: self._id2word[x])

    def str(self, graph, step, key):
        df_str = self.words(graph, step, key)
        
        ## each token's string version - excepting backslash - has a space appended to it,
        ## therefore the string output should be compile if the prediction was syntactically correct
        return pd.DataFrame(["".join(row) for row in df_str.itertuples(index=False)])
                

In [75]:
v = Visualize('./tb_metrics_dev/2017-10-06 00-39-05 PDT/store', '../data/generated2')

In [76]:
v.keys('training', 1)

[u'alpha', u'predicted_ids', u'y']

In [77]:
v.np('training', 1, 'alpha').shape

(1, 40, 71, 3, 33)

In [83]:
v.np('training', 1, 'alpha')[0,0]

array([[[ 0.00650329,  0.02085393,  0.03852431, ...,  0.0033305 ,
          0.00321665,  0.0032965 ],
        [ 0.0019619 ,  0.00199953,  0.00267198, ...,  0.00610553,
          0.00607357,  0.00819643],
        [ 0.00984446,  0.02706817,  0.0226049 , ...,  0.00518685,
          0.00519742,  0.00717194]],

       [[ 0.00499212,  0.01671512,  0.03074244, ...,  0.0027173 ,
          0.00265462,  0.00344004],
        [ 0.00226628,  0.00223743,  0.00160808, ...,  0.00640897,
          0.00633911,  0.0082096 ],
        [ 0.01315936,  0.04250258,  0.01784795, ...,  0.00509784,
          0.00509746,  0.00670797]],

       [[ 0.00501953,  0.0167746 ,  0.03186426, ...,  0.00270955,
          0.00264292,  0.00336353],
        [ 0.00221744,  0.00220547,  0.0016955 , ...,  0.00639331,
          0.00634303,  0.00830908],
        [ 0.0135147 ,  0.04153442,  0.01806595, ...,  0.00502808,
          0.0050262 ,  0.00677582]],

       ..., 
       [[ 0.00511913,  0.0181241 ,  0.0291148 , ...,  0.0029531

In [61]:
df = v.words('validation', 1, 'y')
print (df == '\\').sum().sum()

41


In [59]:
v.str('training', 1, 'y')

Unnamed: 0,0
0,"h_{\mu \nu }(k)=h^{+}(k)e_{\mu \nu }^{+}(k)+h^{\times }(k)\,e_{\mu \nu }^{\times }(k)\,\label {fas}\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"
1,"\label {8}D^{++}_r\Gamma ^{(0|2,2,2,2)}=0\;,\quad \mbox {if}x_r\neq x_s\;,\quad r,s=1,\ldots ,4\;.\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"
2,"\omega _0^2-k_n^2\left [{1\over g}+{1\over 2}k_n^{D-4}\int ^{\Lambda /k_n}{d^Dq\over (2\pi )^D}\,{1\over q^2(q+1)^2}\right ]=0.\eos \eos \eos \eos \eos"
3,"S_{YM}=\int d^dx\,\left (-\frac {1}{4g^2}\right )F_{\mu \nu }*F^{\mu \nu }\,,\label {eq:NYM_action}\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"
4,"\label {A13}T(u_E)=\left (\begin {array}{c|c}1&b(u^0_E)\vec u_E^{\rm T}\\[1ex]\hline 0&I\end {array}\right ),\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"
5,"K^{(1)}\;:=\;\frac {1}{-\triangle +\frac {e^2N}{\pi +gN}}\;=\;\frac {\pi +gN}{\pi }K_{11}\;,\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"
6,\label {Qrepresent_int}Q_{\nu -1/2}(\cosh \rho )=\frac {1}{\sqrt {2}}\int _{\rho }^{\infty }dt\frac {e^{-\nu t}}{\sqrt {\cosh t-\cosh \rho }}\.\eos \eos \eos \eos \eos \eos \eos
7,\left .-2\int d\phi d\theta N\\delta \left (\gamma -\frac {\gamma ^3}{l^2}\right )\right |^{r_{++}}_{r_+}\.\label {var3}\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos
8,"S^{F}=\sum ^{+\infty }_{n_{1}=0}\sum ^{n_{1}}_{n_{0}=-n_{1}-1}\oplus (1/2+n_{0},3/2+n_{1}),\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"
9,eq.(16)\bar {\bar {\Psi }}_0\i \gamma ^\mu \partial _\mu \Psi _0=\bar {\Psi }\i \gamma ^\mu (\partial _\mu \Psi -\i eA_\mu )\Psi -m\bar {\Psi }\Psi \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos


In [21]:
v.w2i['\\']

61