In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

In [2]:
# Load Data
train_df = pd.read_csv("../data/train.csv")
test_df = pd.read_csv("../data/test.csv")
# validation_df = pd.read_csv("../data/valid.csv")
y_test = np.zeros(len(test_df))

In [3]:
def evaluate_recall(y, y_test, k=1):
    num_examples = float(len(y))
    num_correct = 0
    for predictions, label in zip(y, y_test):
        if label in predictions[:k]:
            num_correct += 1
    return num_correct/num_examples

In [4]:
def predict_random(context, utterances):
    return np.random.choice(len(utterances), 10, replace=False)

In [5]:
# Evaluate Random predictor
y_random = [predict_random(test_df.Context[x], test_df.iloc[x,1:].values) for x in range(len(test_df))]
for n in [1, 2, 5, 10]:
    print("Recall @ ({}, 10): {:g}".format(n, evaluate_recall(y_random, y_test, n)))

Recall @ (1, 10): 0.10111
Recall @ (2, 10): 0.198256
Recall @ (5, 10): 0.500317
Recall @ (10, 10): 1


In [6]:
class TFIDFPredictor:
    def __init__(self):
        self.vectorizer = TfidfVectorizer()

    def train(self, data):
        self.vectorizer.fit(np.append(data.Context.values,data.Utterance.values))

    def predict(self, context, utterances):
        # Convert context and utterances into tfidf vector
        vector_context = self.vectorizer.transform([context])
        vector_doc = self.vectorizer.transform(utterances)
        # The dot product measures the similarity of the resulting vectors
        result = np.dot(vector_doc, vector_context.T).todense()
        result = np.asarray(result).flatten()
        # Sort by top results and return the indices in descending order
        return np.argsort(result, axis=0)[::-1]

In [7]:
# Evaluate TFIDF predictor
pred = TFIDFPredictor()
pred.train(train_df)
# y = [pred.predict(test_df.Context[x], test_df.iloc[x,1:].values) for x in range(len(test_df))]
#for n in [1, 2, 5, 10]:
#    print("Recall @ ({}, 10): {:g}".format(n, evaluate_recall(y, y_test, n)))

In [8]:
# y = [pred.predict(test_df.Context[0], test_df.iloc[0,1:].values)]
y = [pred.predict(test_df.Context[x], test_df.iloc[x,1:].values) for x in range(len(test_df))]

In [9]:
print (test_df.iloc[0,1:])

Ground Truth Utterance                                 nice thank ! __eou__
Distractor_0              wrong channel for it , but check efnet.org , u...
Distractor_1              everi time the kernel chang , you will lose vi...
Distractor_2                                                     ok __eou__
Distractor_3              ! nomodeset > acer __eou__ i 'm assum it be a ...
Distractor_4              http : //www.ubuntu.com/project/about-ubuntu/d...
Distractor_5              thx __eou__ unfortun the program be n't instal...
Distractor_6              how can i check ? by do a recoveri for test ? ...
Distractor_7                                        my humbl apolog __eou__
Distractor_8                                        # ubuntu-offtop __eou__
Name: 0, dtype: object


In [11]:
for n in [0,1, 2, 5, 10]:
    #print("Recall @ ({}, 10): {:g}".format(n, evaluate_recall(y, y_test, n)))
    print ("----------------------------------------------------------------")
    print ("[context]",test_df.Context[n])
    print ()
    print ("[anwer]",test_df.iloc[n,1:].values[y[n][0]])

----------------------------------------------------------------
[context] anyon know whi my stock oneir export env var usernam ' ? i mean what be that use for ? i know of $ user but not $ usernam . my precis instal doe n't export usernam __eou__ __eot__ look like it use to be export by lightdm , but the line have the comment `` // fixm : be this requir ? '' so i guess it be n't surpris it be go __eou__ __eot__ thank ! how the heck do you figur that out ? __eou__ __eot__ https : //bugs.launchpad.net/lightdm/+bug/864109/comments/3 __eou__ __eot__

[anwer] thx __eou__ unfortun the program be n't instal from the repositori __eou__
----------------------------------------------------------------
[context] i set up my hd such that i have to type a passphras to access it at boot . how can i remov that passwrd , and just boot up normal . i do this at instal , it work fine , just tire of have reboot where i need to be at termin to type passwd in . help ? __eou__ __eot__ backup your data , and 