In [1]:
import pandas as pd
import json
from sklearn.manifold import TSNE
from bokeh.io import output_notebook
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.models import HoverTool
import matplotlib.pyplot as plt
output_notebook()
%matplotlib inline

In [2]:
def add_embeddings_to_df(df, embedding_file, column_name):
    embeddings = pd.read_csv(embedding_file)
    embeddings['embedding'] = embeddings.apply(lambda row: json.loads(row['embedding']), axis=1)
    df[column_name] = df.apply(lambda row: embeddings[embeddings.id == row['id']].embedding.values.tolist()[0], axis=1)
    return df

In [3]:
train_data = pd.read_csv('../all_data_files/experiments/data.csv') # 5759 training examples
held_out_data = pd.read_csv('../all_data_files/held-out/held-out-data.csv')
all_data = pd.concat([train_data,held_out_data])

In [4]:
# adds embeddings of the original config
train_data = add_embeddings_to_df(train_data,'../all_data_files/experiments/full_paths_original_4.csv', 'lexnet_default')

# adds embeddings of the custom config
train_data = add_embeddings_to_df(train_data,'../all_data_files/experiments/middle_paths_unrestricted_16.csv', 'lexnet_custom')

In [None]:
held_out_data = add_embeddings_to_df(held_out_data, '../all_data_files/held-out/h_full_paths_original_4.csv', 'lexnet_default')
held_out_data = add_embeddings_to_df(held_out_data, '../all_data_files/held-out/h_middle_paths_unrestricted_16.csv', 'lexnet_custom')

In [None]:
# jointly learned embeddings for all data
all_data = add_embeddings_to_df(all_data, '../all_data_files/extras/combi_full_paths_original_4.csv', 'lexnet_original')
all_data = add_embeddings_to_df(all_data, '../all_data_files/extras/combi_middle_paths_unrestricted_16.csv', 'lexnet_custom')

In [None]:
def tsne_plot(df, lexnet_config):
    X_embedded = TSNE(n_components=2,verbose=1).fit_transform(df[lexnet_config].tolist())

    plot_frame = pd.DataFrame(columns=['x','y','class','sentence'])
    for i,x in enumerate(X_embedded):
        try:
            plot_frame.loc[i] = [x[0],x[1],df.loc[i].most_frequent_label,df.loc[i].sentence]
        except Exception as e:
            print(e)
    def build_source(label, df):
        df = df[df['class'] == label]
        return ColumnDataSource(data=dict(
        x=df.x.values.tolist(),
        y=df.y.values.tolist(),
        objecta=df.object_a,
        objectb=df.object_b,
        sentence=df.sentence.tolist(),
        label=df['class'].tolist()))

    colormap = {'WORSE': 'red', 'BETTER': 'green', 'NONE': 'blue'}
    colors = [colormap[x] for x in plot_frame['class']]

    hover = HoverTool(tooltips=[
        ("Sentence", "@sentence"),
        ("label", "@label"),
    ])
  
    p = figure(plot_width=900,  plot_height=900, tools="pan,wheel_zoom,box_zoom,reset,previewsave")
    p.add_tools(hover)

    p.cross(x='x', y='y', source=build_source('NONE', plot_frame),size=5,color='gray')
    p.circle(x='x', y='y', source=build_source('WORSE', plot_frame) ,size=5,color='red')
    p.circle(x='x', y='y', source=build_source('BETTER', plot_frame),size=5,color='green')
    return p


In [None]:
show(tsne_plot(train_data[:10], 'lexnet_default'))

In [None]:
show(tsne_plot(train_data[:10], 'lexnet_custom'))