In [48]:
import os
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
from matplotlib import pyplot as plt
from pathlib import Path
import graphviz


In [2]:
from IPython.display import HTML

display(HTML(data="""
<style>
    div#notebook-container    { width: 95%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 99%; }
</style>
"""))

In [3]:
def custom_plot(df, name):
    f1 = plt.figure()
    ax1 = f1.add_subplot(111)
    df.plot(kind = 'bar',ax=ax1)
    img_path = 'data/img/'+name+'.png'
    plt.savefig(img_path)
    plt.close()
    return  [f1, img_path]

In [4]:
datasets = [name for name in os.listdir("data")]
datasets.remove('img')

In [5]:
datasets_dropdown = widgets.Dropdown(
    options=datasets,
    value=datasets[0],
    description='Dataset',
    disabled=False,
)

gt_filter = widgets.RadioButtons(
    options=['Any', 'NO-MATCH', 'MATCH'],
    description='Label',
    disabled=False
)

pred_filter = widgets.RadioButtons(
    options=['Any', 'NO-MATCH', 'MATCH'],
    description='Prediction',
    disabled=False
)

sys_label = widgets.Label(
    value='ER Systems'
)
de_cb = widgets.Checkbox(
    value=True,
    description='DeepER',
    disabled=False,
    indent=False,
)
dm_cb = widgets.Checkbox(
    value=True,
    description='DeepMatcher',
    disabled=False,
    indent=False,
)
dt_cb = widgets.Checkbox(
    value=True,
    description='Ditto',
    disabled=False,
    indent=False,
)

box_layout = widgets.Layout(display='flex',
                flex_flow='column',
                align_items='center')


In [7]:
#def f2(dataset, deeper, dm, ditto, item):
    

In [8]:
out2 = widgets.Output()

In [9]:
#out3 = widgets.interactive_output(f2, {'dataset': datasets_dropdown, 'deeper': de_cb, 'dm': dm_cb, 'ditto':dt_cb, 'selected_item': selected_item})


In [142]:
def f(dataset, deeper, dm, ditto, pred_filter, gt_filter):
    samples = pd.read_csv('data/'+dataset+'/samples.csv').drop(['ltable_id', 'rtable_id'], axis=1)
    if not deeper:
        samples = samples.drop(['DeepER'], axis=1)
    if not dm:
        samples = samples.drop(['DeepMatcher'], axis=1)
    if not ditto:
        samples = samples.drop(['Ditto'], axis=1)
    if gt_filter == 'NO-MATCH':
        samples = samples[samples['label']==0]
    if gt_filter == 'MATCH':
        samples = samples[samples['label']==1]
    if pred_filter == 'NO-MATCH':
        samples = samples[samples['DeepER']<0.5]
    if pred_filter == 'MATCH':
        samples = samples[('DeepER' in samples.columns and samples['DeepER']>0.5) | ('DeepMatcher' in samples.columns and samples['DeepMatcher']>0.5) | ('Ditto' in samples.columns and samples['Ditto']>0.5)]
    samples = samples.loc[:, ~samples.columns.str.contains('^Unnamed')]
    buttons = []
    for idx in range(len(samples)):
        button = widgets.Button(description="Explain Item "+str(idx))
        def on_button_clicked(b):
            saliencies = dict()
            cfs = dict()
            item_idx = int(b.description[-1])
            if deeper:
                saliency = pd.read_csv('data/'+dataset+'/DeepER/certa.csv')['explanation'].iloc[item_idx]
                first_cf = pd.read_csv('data/'+dataset+'/DeepER/'+str(idx)+'/certa.csv').iloc[0]
                saliencies['DeepER'] = saliency
                cfs['DeepER'] = first_cf
            if dm:
                saliency = pd.read_csv('data/'+dataset+'/DeepMatcher/certa.csv')['explanation'].iloc[item_idx]
                first_cf = pd.read_csv('data/'+dataset+'/DeepMatcher/'+str(idx)+'/certa.csv').iloc[0]
                saliencies['DeepMatcher'] = saliency
                cfs['DeepMatcher'] = first_cf
            if ditto:
                saliency = pd.read_csv('data/'+dataset+'/Ditto/certa.csv')['explanation'].iloc[item_idx]
                first_cf = pd.read_csv('data/'+dataset+'/Ditto/'+str(idx)+'/certa.csv').iloc[0]
                saliencies['Ditto'] = saliency
                cfs['Ditto'] = first_cf
            saliencies_box = []
            for k in saliencies.keys():
                saliency_df = pd.DataFrame(eval(saliencies[k]),index=[0])
                cnv, path = custom_plot(saliency_df, dataset+'_'+k+'_'+str(idx))
                img = widgets.Image(value=open(path, 'rb').read(), format='png')
                inspect_button = widgets.Button(description='Inspect '+k)
                def inspect_button_click(ib):
                    selected_model = ib.description[8:]
                    single_pred = samples.loc[item_idx]
                    for sm in ['DeepER', 'DeepMatcher', 'Ditto']:
                        if sm != selected_model:
                            single_pred = single_pred.drop(sm)
                    pred_out = widgets.Output()
                    with pred_out:
                        display(pd.DataFrame(single_pred).T)
                    out_pnn = widgets.Output()
                    with out_pnn:
                        display(saliency_df)
                    out_pss = widgets.Output()
                    with out_pss:
                        display()
                    
                    item_data_path = 'data/'+dataset+'/'+selected_model+'/'+str(item_idx)+'/'
                    tr_files = [f for f in Path(item_data_path).iterdir() if f.match("triangle_*.csv")]
                    tr_dfs = []
                    for tr_file in tr_files:
                        tr_dfs.append(pd.read_csv(tr_file).drop(['Unnamed: 0'], axis=1))
                    lt_files = [f for f in Path(item_data_path).iterdir() if f.match("lattice_*.dot")]
                    tr_slider = widgets.IntSlider(value=0, min=0, max=len(tr_files)-1, step=1, description='Triangle:', disabled=False, continuous_update=False, orientation='horizontal',
                            readout=True, readout_format='d')
                    def tr_slide(slide):
                        out_df = widgets.Output()
                        with out_df:
                            display(tr_dfs[slide])
                        display(widgets.VBox([widgets.Image(value=graphviz.Source.from_file(lt_files[slide]).pipe(format='png'),format='png'), out_df], layout=box_layout))
                    tr_out = widgets.interactive_output(tr_slide, {'slide': tr_slider})
                    display(widgets.VBox([widgets.Label(selected_model+' Prediction'), pred_out, 
                                          widgets.Label('Probability of Necessity'),
                                          out_pnn,
                                          widgets.Label('Probability of Sufficiency'),
                                          out_pss,
                                          widgets.HBox([tr_slider]), tr_out], layout=box_layout))
                inspect_button.on_click(inspect_button_click)    
                saliencies_box.append(widgets.VBox([img , inspect_button], layout=box_layout))
            cfs_df = pd.DataFrame.from_dict(cfs).T.drop(['alteredAttributes', 'attr_count', 'copiedValues', 'droppedValues', 'label', 'triangle', 'nomatch_score'], axis=1)
            cfs_df['prediction'] = cfs_df['match_score'].copy()
            cfs_df = cfs_df.drop(['match_score'], axis=1)
            cfs_df = cfs_df.loc[:, ~cfs_df.columns.str.contains('^Unnamed')]
            out2.clear_output()
            display(widgets.HBox(saliencies_box))
            display(cfs_df)
        button.on_click(on_button_clicked)
        buttons.append(button)
    buttons_box = widgets.HBox(buttons)
    display(samples, buttons_box, out2)

In [143]:
out = widgets.interactive_output(f, {'dataset': datasets_dropdown, 'deeper': de_cb, 'dm': dm_cb, 'ditto':dt_cb, 'pred_filter': pred_filter, 'gt_filter': gt_filter})

In [144]:
first_box = widgets.VBox([datasets_dropdown])
second_box = widgets.HBox([sys_label, widgets.VBox([de_cb, dm_cb, dt_cb])])
third_box = widgets.VBox([gt_filter])
fourth_box = widgets.VBox([pred_filter])
top1 = widgets.HBox([first_box, second_box, third_box, fourth_box])
ui = widgets.VBox([top1, out, out2], layout=box_layout)

In [145]:
ui

VBox(children=(HBox(children=(VBox(children=(Dropdown(description='Dataset', index=1, options=('AB', 'BA', 'IA…