In [1]:
import numpy
import pandas as pd
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
from ipywidgets import widgets, Layout

In [2]:
f_path = '/Users/amandeep/Github/dig-text-similarity-search/data/evaluation_results.csv'
df = pd.read_csv(f_path)
df_index = 0

In [3]:
ow = widgets.Output()
display(ow)
    
def previous_click(btn):
    previous_df_record()

def next_click(btn):
    next_df_record()

doc_text_widget = widgets.HTML(value='', placeholder='Type something', 
                                   description='News Article:', disabled=False, 
                                   layout=Layout(width='900px', height='500px'))
ifp_widget = widgets.Text(value='', placeholder='something...', description='ifp:',
                          disabled=False, layout=Layout(width='900px', height='30px'))
ms_widget = widgets.Text(value='', placeholder='something...', 
                         description='Sentence:', disabled=False,
                        layout=Layout(width='900px', height='30px'))
score_widget = widgets.Text(value='', description='Score:', disabled=False)
relevance_widget = widgets.Text(value='', description = 'Relevance:', disabled=False)

annotate_widget = widgets.Select(
    options=['No Idea', 'Very Relevant', 'Topically Relevant', 'Irrelevant'],
    value='No Idea',
    # rows=10,
    description='Annotate:',
    disabled=False
)

previous_btn = widgets.Button(description="<< previous")
previous_btn.on_click(previous_click)

next_btn = widgets.Button(description="next >>")
next_btn.on_click(next_click)

records_widget = widgets.Text(value='', placeholder='something...', description='', disabled=False)

zero_box = widgets.HBox([widgets.Label('Currently viewing: '),records_widget])
first_box = widgets.HBox([ifp_widget])
second_box = widgets.HBox([ms_widget])
third_box = widgets.HBox([doc_text_widget])
fourth_box = widgets.HBox([annotate_widget, score_widget])
fifth_box = widgets.HBox([previous_btn, next_btn])

whole_box = widgets.VBox([zero_box, first_box, second_box,third_box, fourth_box, fifth_box])

relevance_dict = {
    'No Idea': -1, 
    'Very Relevant': 3, 
    'Topically Relevant':2, 
    'Irrelevant':1
}

reverse_relevance_dict = {
    -1: 'No Idea', 
    3: 'Very Relevant', 
    2: 'Topically Relevant',
    1: 'Irrelevant'
}

def create_highlighted_sentences(sentence, paragraph):
    high_para = ''
    index = paragraph.find(sentence.replace('\n', '<br/>'))
    print('matched sentence index:{}'.format(index))
    if index == -1:
        return paragraph

    high_para += paragraph[0:index]
    n = len(sentence)
    high_para += '<mark>{}</mark>'.format(sentence)
    high_para += paragraph[index + n:]
    return high_para

def on_annotate_change(change):
    update_relevance(relevance_dict[change['new']])


def update_relevance(relevance):
    global df_index
    df.at[df_index,'relevance'] = relevance
    

annotate_widget.observe(on_annotate_change, names='value')

def previous_df_record():
    global df_index
    prev = df_index - 1
    if prev < 0:
        print('This is the first record')
    else:
        df_index -= 1
        display_df(df, prev)
    
def next_df_record():
    global df_index
    next = df_index + 1
    if next > len(df) - 1:
        print('This is the last record')
    else:
        df_index += 1
        display_df(df, next)

def display_df(df, index):
    ow.clear_output()
    with ow:
        records_widget.value = '{} of {} records'.format(df_index+1, len(df))
        row = df.iloc[index]
        doc_text_widget.value = create_highlighted_sentences(row['sentence_text'], row['doc_text'])
        ifp_widget.value = row['ifp']
        ms_widget.value = row['sentence_text']
        score_widget.value = str(row['score'])
        annotate_widget.value = reverse_relevance_dict[row['relevance']]
        display(whole_box)

display_df(df, df_index)

Output()

In [1]:
output_path = 'evaluation_results_annotation.csv'
# df.to_csv(output_path,index=False)

In [1]:
import pandas as pd
import hashlib
output_path = 'evaluation_results_annotation.csv'
annotated_df = pd.read_csv(output_path)
def create_hash(str):
    return hashlib.sha256(str.encode('utf-8')).hexdigest().upper()

dash_data = dict()
for index, row in annotated_df.iterrows():
    h = create_hash(row['ifp'])
    if h not in dash_data:
        dash_data[h] = list()
    dash_data[h].append({'score': row['score'], 'sentence': 'Matched Sentence: {}      IFP:{}'.format(row['sentence_text'], row['ifp']),'relevance': row['relevance'], 'ifp':row['ifp']})


In [2]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objs as go

In [3]:
app = dash.Dash()


In [4]:
app.layout = html.Div([
    dcc.Graph(
        id='score-vs-relevance',
        figure={
            'data': [
                go.Scatter(
                    x=[g['score'] for g in dash_data[key]],
                    y=[g['relevance'] for g in dash_data[key]],
                    text=[g['sentence'] for g in dash_data[key]],
                    mode='markers',
                    opacity=0.7,
                    marker={
                        'size': 15,
                        'line': {'width': 0.5, 'color': 'white'}
                    },
                    name=dash_data[key][0]['ifp'][:5],
                ) for key in dash_data.keys()
            ],
            'layout': go.Layout(
                xaxis={'type': 'log', 'title': 'Score'},
                yaxis={'title': 'Relevance'},
                margin={'l': 40, 'b': 40, 't': 10, 'r': 10},
                legend={'x': 0, 'y': 1},
                hovermode='closest'
            )
        }
    )
])

In [None]:
app.run_server()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [20/Sep/2018 18:16:33] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [20/Sep/2018 18:16:34] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [20/Sep/2018 18:16:34] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [20/Sep/2018 18:16:34] "[37mGET /favicon.ico HTTP/1.1[0m" 200 -
