#### This notebook is used for extracting and formatting data for publishing. Copy it into a gallery folder such as gallery/I2L-STRIPS before running it.

In [None]:
import sys
sys.path.extend(['../../src/commons'])

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [None]:
import pandas as pd
import os
import re
import codecs
from IPython.display import display, Math, Latex
from IPython.display import Image as ipImage
from six.moves import cPickle as pickle
import string
from PIL import Image
import numpy as np
import h5py
import matplotlib as mpl
from matplotlib import pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
# Config the matplotlib backend as plotting inline in IPython
%matplotlib inline

In [None]:
pd.options.display.max_rows = 120
pd.options.display.max_colwidth = 600
pd.options.display.expand_frame_repr = False
pd.options.display.colheader_justify = 'left'

In [None]:
import data_commons as dtc
import dl_commons as dlc
import viz_commons as viz
from viz_commons import VisualizeDir, DiffParams, VisualizeStep

### Load results of test run

In [None]:
storedir = '.'
clobber = True
dump = True
evaldir = os.path.join(storedir, 'eval_images')
rendered_dir = os.path.join(evaldir, 'rendered_images')
dumpdir = os.path.join(storedir, 'gallery_data')

def chkclobber(path):
    assert clobber or (not os.path.exists(path)), "Can't overwrite file %s when clobber==False"%path
    return path

def dump(df_, df_sample_, fname):
    if dump:
        with open(chkclobber(os.path.join(dumpdir, '%s_sample_table.txt'%fname)), 'w') as f:
            for row in df_sample_[['y','$\hat{y}$']].itertuples(index=False):
                f.write(row[0] + ' & ' + row[1] + '\n')
        df_.to_pickle(chkclobber(os.path.join(dumpdir, '%s_preds.pkl'%fname)))
        df_sample_.to_pickle(chkclobber(os.path.join(dumpdir, '%s_preds_sample.pkl'%fname)))

In [None]:
vd = VisualizeDir(os.path.expanduser(storedir))
last_step = vd.get_steps()[1][-1]
print('last_step = %d' % last_step)
vs = VisualizeStep(vd, 'test', last_step)
df_preds = pd.read_pickle(os.path.join(evaldir, 'predictions_test_%d.pkl'%last_step))

In [None]:
df_preds

In [None]:
df_strs = vs.strs( 'y', 'predicted_ids', sortkey=None, mingle=False, trim=True, wrap_strs=True, keys=['image_name', 'ed'])
df_strs.columns

### View and save the unmatched images

In [None]:
with open(os.path.join(rendered_dir, 'unmatched_filenames.txt'), 'r') as f:
    unmatched = []; missing = []
    for fname in f:
        fname = os.path.basename(fname.strip())
        path = os.path.join(rendered_dir, 'images_pred', fname)
        if not os.path.exists(path):
            missing.append(fname)
        else:
            unmatched.append(fname)
num_missing = len(missing)
total = len(df_preds)
print('%d(%.3f%%) files missing out of %d'%(num_missing, (num_missing*100.)/(total*1.0), total))
df_bad = df_preds.loc[unmatched]

def wrap_math(df_):
    """Wrap the latex formulas with $ symbols."""
    targets=[]; preds=[];  # image=[];
    for row in df_[['target_seq', 'pred_seq']].itertuples(index=True):
#         image.append(row[0])
        targets.append('$%s$'%row[1])
        preds.append('$%s$'%row[2])
    _df = df_.drop(['iloc'], axis=1).reset_index(drop=False).copy(deep=True)
    _df = _df.assign(y=targets, pred=preds)
    return _df.rename(columns={'pred':'$\hat{y}$'})

df_bad_sample_ = wrap_math(df_bad.sample(115))
df_bad_sample_[['$\hat{y}$', 'y']]

In [None]:
df_bad_sample_.columns

#### Filter MathJax Errors
Though they rendered fine with pdflatex, MathJax has difficulty rendering some images. Therefore we will remove them for visualization purposes so that one may leverage pandas to generate pretty formatted formulas.

In [None]:
df_bad_sample = df_bad_sample_.drop([1,3,44,45,86,89,94,102,107,110,114]).iloc[:100].reset_index(drop=True)
df_bad_sample[['$\hat{y}$', 'y']]

In [None]:
dump(df_bad, df_bad_sample, 'unmatched')

In [None]:
df_matched = df_preds[~df_preds.index.isin(unmatched + missing)]
df_matched.shape

In [None]:
df_matched[df_matched.ed==0.0].shape

In [None]:
df_txt_matched = df_matched[df_matched.ed==0.0].sort_values(by='pred_len', ascending=False)
df_txt_matched_sample_ = wrap_math(df_txt_matched[:100])
df_txt_matched_sample_[['$\hat{y}$', 'y']]

In [None]:
dump(df_txt_matched, df_txt_matched_sample_, 'txt_matched')

In [None]:
df_img_matched.columns

In [None]:
df_img_matched = df_matched[df_matched.ed!=0.0].sort_values(by='pred_len', ascending=False)
df_img_matched_sample_ = wrap_math(df_img_matched[:110])
df_img_matched_sample_[['$\hat{y}$', 'pred_len', 'y',  'target_len']]

In [None]:
df_img_matched_sample = df_img_matched_sample_.drop([29, 60, 89, 104]).reset_index(drop=True).iloc[:100]
df_img_matched_sample[['$\hat{y}$', 'pred_len', 'y',  'target_len']]

In [None]:
dump(df_img_matched, df_img_matched_sample, 'img_matched')

# End