In [1]:
import nltk
import pandas as pd
import numpy as np
import math
import urllib
import re
import string
import copy
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer

### Preprocess

In [2]:
with urllib.request.urlopen("https://s3.us-east-2.amazonaws.com/yulp-public/sst3.train.txt") as f1:
    train = f1.read().decode('utf-8').splitlines()

with urllib.request.urlopen("https://s3.us-east-2.amazonaws.com/yulp-public/sst3.dev.txt") as f2:
    dev = f2.read().decode('utf-8').splitlines()
    
with urllib.request.urlopen("https://s3.us-east-2.amazonaws.com/yulp-public/sst3.devtest.txt") as f3:
    devtest = f3.read().decode('utf-8').splitlines()

In [3]:
labels = ["0", "1", "2"]

In [4]:
def lemmatize(text):
    tokens = nltk.word_tokenize(text)
    tokens = nltk.pos_tag(tokens)
    lm = WordNetLemmatizer()
    
    def get_category(token):
        tag = token[1]
        if tag.startswith('N'):
            return 'n'
        elif tag.startswith('V'):
            return 'v'
        elif tag.startswith('J'):
            return 'a'
        elif tag.startswith('R'):
            return 'r'
        else:
            return None
        
    lemmatized_tokens = [lm.lemmatize(token[0], get_category(token))
                         if get_category(token)
                         else token[0]
                         for token in tokens]
    
    lemmatized_text = ' '.join(lemmatized_tokens)
    return lemmatized_text


def remove_punctuations_and_stopwords(text):
    pattern = re.compile('[{}]'.format(re.escape(string.punctuation)))
    text = pattern.sub('', text)

    tokens = nltk.word_tokenize(text)
    stopword_list = nltk.corpus.stopwords.words('english')
    tokens = [token for token in tokens if token not in stopword_list]
    text = ' '.join(tokens)
       
    return text

def normalize(text):
    text = text.lower()
    text = lemmatize(text)
    text = remove_punctuations_and_stopwords(text)
    return text

In [5]:
def preprocess(corpus, feature="ub"):
    lst = []
    
    for entry in corpus:
        [text, label] = entry.split("\t")
        
        if feature == "normalized":
            text = normalize(text)
        
        tokens = nltk.word_tokenize(text)
            
        if feature == "bb":       
            tokens = list(nltk.bigrams(tokens))
            
        lst.append((tokens, label))
                          
    return lst

### Extract features

In [6]:
def extract_features(corpus_lst):
    features = {}
    for entry in corpus_lst:
        label = entry[1]
        for item in entry[0]:
            if features.get(item) is None:
                features[item] = {}
            if features[item].get(label) is None:
                features[item][label] = 0.0
    return features

### Learning

In [7]:
def classify(entry, features, labels, hinge=False):
    max_score = float("-inf")
    (tokens, gs) = entry
    
    for label in labels:
        score = 0.0
        
        for token in tokens:
            d = features.get(token)
            if d is not None:
                score += d.get(label, 0)
        
        if hinge:
            if label != gs:
                score += 1
                
        if score > max_score:
            max_score = score
            prediction = label
            
    return prediction

In [8]:
def test_accuracy(test_lst, features, labels):
    n = len(test_lst)
    correct = 0
    for entry in test_lst:
        prediction = classify(entry, features, labels)
        if prediction == entry[1]:
            correct += 1
    return correct/n

In [9]:
def online_training(train_lst, dev_lst, devtest_lst, features, labels, ss=0.01, epoch=20, hinge=False):
    features_ = copy.deepcopy(features)
    best_accuracy = 0
    n = len(train_lst)
    
    for e in range(epoch):
        print("Epoch "+str(e+1)+":")
        counter = 0
        
        for entry in train_lst:
            counter += 1
            (tokens, gs) = entry
            
            if hinge:
                prediction = classify(entry, features_, labels, hinge=True)
            else:
                prediction = classify(entry, features_, labels)
            
            if prediction != gs:
                for token in tokens:
                    d = features_[token]
                    if d.get(gs) is not None:
                        d[gs] += ss
                    if d.get(prediction) is not None:
                        d[prediction] -= ss
            
            if counter%20000 == 0 or counter == n:
                print("\t"+str(counter)+" examples trained:")
                accuracy = test_accuracy(dev_lst, features_, labels)
                print("\t\t"+"Accurary on DEV: "+str(accuracy))
                if accuracy > best_accuracy:
                    features_best_weight = copy.deepcopy(features_)
                    best_accuracy = accuracy
                    accuracy_devtest = test_accuracy(devtest_lst, features_, labels)
                    print("\t\t"+"Accurary on DEVTEST: "+str(accuracy_devtest))
    
    return features_best_weight, best_accuracy, accuracy_devtest

### Feature weight analyis

In [10]:
def weight_analysis(features, labels):
    weights = {label:[] for label in labels}
    
    for token, d in features.items():
        for label, weight in d.items():
            weights[label].append((weight, token))
    
    for label in labels:
        print("Label "+label+":")
        weights[label].sort(reverse=True)
        print("\tTop 10 features:")
        for feature in weights[label][:10]:
            print("\t\t"+str(feature[1])+": " + "{0:.2f}".format(feature[0]))
        print("\tBottom 10 features:")
        for feature in list(reversed(weights[label][-10:])):
            print("\t\t"+str(feature[1])+": " + "{0:.2f}".format(feature[0]))
    
    return weights

### Error analysis

In [11]:
def error_analysis(test_lst, test_corpus, features, labels):
    for i, entry in enumerate(test_lst):
        prediction = classify(entry, features, labels)
        if prediction != entry[1]:
            print("Sentence: "+test_corpus[i].split("\t")[0])
            print("Gold Standard: "+entry[1])
            print("Prediction: "+prediction)
            print("\n")

### Unigram binary features

In [12]:
train_lst = preprocess(train)
dev_lst = preprocess(dev)
devtest_lst = preprocess(devtest)

In [13]:
features_0 = extract_features(train_lst)

In [14]:
features_pl, accuracy_pl, accuracy_devtest_pl = online_training(train_lst, dev_lst, devtest_lst, features_0, labels)

Epoch 1:
	20000 examples trained:
		Accurary on DEV: 0.4672727272727273
		Accurary on DEVTEST: 0.47005444646098005
	40000 examples trained:
		Accurary on DEV: 0.49272727272727274
		Accurary on DEVTEST: 0.4482758620689655
	60000 examples trained:
		Accurary on DEV: 0.5
		Accurary on DEVTEST: 0.4627949183303085
	80000 examples trained:
		Accurary on DEV: 0.46545454545454545
	100000 examples trained:
		Accurary on DEV: 0.5527272727272727
		Accurary on DEVTEST: 0.5190562613430127
	120000 examples trained:
		Accurary on DEV: 0.5909090909090909
		Accurary on DEVTEST: 0.5226860254083484
	120789 examples trained:
		Accurary on DEV: 0.5545454545454546
Epoch 2:
	20000 examples trained:
		Accurary on DEV: 0.49636363636363634
	40000 examples trained:
		Accurary on DEV: 0.49818181818181817
	60000 examples trained:
		Accurary on DEV: 0.5454545454545454
	80000 examples trained:
		Accurary on DEV: 0.5690909090909091
	100000 examples trained:
		Accurary on DEV: 0.5563636363636364
	120000 examples train

	100000 examples trained:
		Accurary on DEV: 0.49272727272727274
	120000 examples trained:
		Accurary on DEV: 0.6163636363636363
	120789 examples trained:
		Accurary on DEV: 0.5818181818181818
Epoch 19:
	20000 examples trained:
		Accurary on DEV: 0.48363636363636364
	40000 examples trained:
		Accurary on DEV: 0.5054545454545455
	60000 examples trained:
		Accurary on DEV: 0.5727272727272728
	80000 examples trained:
		Accurary on DEV: 0.5436363636363636
	100000 examples trained:
		Accurary on DEV: 0.48
	120000 examples trained:
		Accurary on DEV: 0.6218181818181818
	120789 examples trained:
		Accurary on DEV: 0.5672727272727273
Epoch 20:
	20000 examples trained:
		Accurary on DEV: 0.4690909090909091
	40000 examples trained:
		Accurary on DEV: 0.5327272727272727
	60000 examples trained:
		Accurary on DEV: 0.5654545454545454
	80000 examples trained:
		Accurary on DEV: 0.56
	100000 examples trained:
		Accurary on DEV: 0.5018181818181818
	120000 examples trained:
		Accurary on DEV: 0.5963636

In [15]:
print("Unigram Binary Features (Perceptron Loss):")
print("\tBest Accuracy on DEV: "+str(accuracy_pl))
print("\tBest Accuracy on DEVTEST: "+str(accuracy_devtest_pl))

Unigram Binary Features (Perceptron Loss):
	Best Accuracy on DEV: 0.6472727272727272
	Best Accuracy on DEVTEST: 0.5825771324863884


In [16]:
features_hl, accuracy_hl, accuracy_devtest_hl = online_training(train_lst, dev_lst, devtest_lst, features_0, labels, hinge=True)

Epoch 1:
	20000 examples trained:
		Accurary on DEV: 0.44
		Accurary on DEVTEST: 0.4029038112522686
	40000 examples trained:
		Accurary on DEV: 0.45454545454545453
		Accurary on DEVTEST: 0.41379310344827586
	60000 examples trained:
		Accurary on DEV: 0.4890909090909091
		Accurary on DEVTEST: 0.4355716878402904
	80000 examples trained:
		Accurary on DEV: 0.48363636363636364
	100000 examples trained:
		Accurary on DEV: 0.49454545454545457
		Accurary on DEVTEST: 0.47549909255898365
	120000 examples trained:
		Accurary on DEV: 0.6309090909090909
		Accurary on DEVTEST: 0.5317604355716878
	120789 examples trained:
		Accurary on DEV: 0.52
Epoch 2:
	20000 examples trained:
		Accurary on DEV: 0.5327272727272727
	40000 examples trained:
		Accurary on DEV: 0.5218181818181818
	60000 examples trained:
		Accurary on DEV: 0.5545454545454546
	80000 examples trained:
		Accurary on DEV: 0.5545454545454546
	100000 examples trained:
		Accurary on DEV: 0.54
	120000 examples trained:
		Accurary on DEV: 0.65

	120000 examples trained:
		Accurary on DEV: 0.6672727272727272
	120789 examples trained:
		Accurary on DEV: 0.6509090909090909
Epoch 19:
	20000 examples trained:
		Accurary on DEV: 0.5945454545454546
	40000 examples trained:
		Accurary on DEV: 0.5945454545454546
	60000 examples trained:
		Accurary on DEV: 0.6127272727272727
	80000 examples trained:
		Accurary on DEV: 0.5981818181818181
	100000 examples trained:
		Accurary on DEV: 0.5927272727272728
	120000 examples trained:
		Accurary on DEV: 0.6654545454545454
	120789 examples trained:
		Accurary on DEV: 0.6563636363636364
Epoch 20:
	20000 examples trained:
		Accurary on DEV: 0.5981818181818181
	40000 examples trained:
		Accurary on DEV: 0.5872727272727273
	60000 examples trained:
		Accurary on DEV: 0.6090909090909091
	80000 examples trained:
		Accurary on DEV: 0.6018181818181818
	100000 examples trained:
		Accurary on DEV: 0.5945454545454546
	120000 examples trained:
		Accurary on DEV: 0.6636363636363637
	120789 examples trained:
		

In [17]:
print("Unigram Binary Features (Hinge Loss):")
print("\tBest Accuracy on DEV: "+str(accuracy_hl))
print("\tBest Accuracy on DEVTEST: "+str(accuracy_devtest_hl))

Unigram Binary Features (Hinge Loss):
	Best Accuracy on DEV: 0.6709090909090909
	Best Accuracy on DEVTEST: 0.5862068965517241


In [18]:
d1 = weight_analysis(features_hl, labels)

Label 0:
	Top 10 features:
		lacks: 1.39
		lacking: 1.27
		tiresome: 1.24
		listless: 1.21
		lousy: 1.18
		worst: 1.17
		unfunny: 1.17
		uneven: 1.17
		sloppy: 1.17
		depressing: 1.17
	Bottom 10 features:
		wonderful: -0.79
		enjoyable: -0.76
		nice: -0.75
		impressive: -0.72
		-RRB-: -0.71
		cute: -0.70
		likable: -0.69
		beautifully: -0.66
		charming: -0.66
		hilarious: -0.66
Label 1:
	Top 10 features:
		means: 0.36
		Mr: 0.35
		While: 0.34
		nor: 0.33
		Big: 0.33
		Cletis: 0.32
		If: 0.30
		word: 0.29
		entire: 0.29
		York: 0.28
	Bottom 10 features:
		enjoy: -0.43
		sweet: -0.43
		care: -0.42
		perfect: -0.42
		best: -0.41
		endearing: -0.40
		good: -0.40
		terrific: -0.40
		exciting: -0.39
		flawed: -0.39
Label 2:
	Top 10 features:
		pleasant: 1.29
		treat: 1.22
		thought-provoking: 1.22
		wonderfully: 1.20
		touching: 1.17
		vividly: 1.16
		delight: 1.14
		wonderful: 1.13
		funniest: 1.10
		feel-good: 1.10
	Bottom 10 features:
		n't: -1.07
		lacks: -1.06
		lacking: -1.00
		hardly:

In [19]:
error_analysis(devtest_lst, devtest, features_hl, labels)

Sentence: With the exception of some fleetingly amusing improvisations by Cedric the Entertainer as Perry 's boss , there is n't a redeeming moment here .
Gold Standard: 0
Prediction: 2


Sentence: Though only 60 minutes long , the film is packed with information and impressions .
Gold Standard: 2
Prediction: 0


Sentence: There 's a solid woman - finding-herself story somewhere in here , but you 'd have to dig pretty deep to uncover it .
Gold Standard: 1
Prediction: 2


Sentence: A great ensemble cast ca n't lift this heartfelt enterprise out of the familiar .
Gold Standard: 0
Prediction: 2


Sentence: Sam Mendes has become valedictorian at the School for Soft Landings and Easy Ways Out .
Gold Standard: 0
Prediction: 2


Sentence: There 's just no currency in deriding James Bond for being a clichéd , doddering , misogynistic boy 's club .
Gold Standard: 1
Prediction: 0


Sentence: Remember the kind of movie we were hoping `` Ecks vs. Sever '' or `` xXx '' was going to be ?
Gold Standa

Sentence: Smith is careful not to make fun of these curious owners of architectural oddities .
Gold Standard: 1
Prediction: 2




### Bigram binary features

In [20]:
train_lst_bb = preprocess(train, feature="bb")
dev_lst_bb = preprocess(dev, feature="bb")
devtest_lst_bb = preprocess(devtest, feature="bb")

In [21]:
features_bb_0 = extract_features(train_lst_bb)

In [22]:
features_bb, accuracy_bb, accuracy_devtest_bb = online_training(train_lst_bb, dev_lst_bb, devtest_lst_bb, features_bb_0, labels, hinge=True)

Epoch 1:
	20000 examples trained:
		Accurary on DEV: 0.4618181818181818
		Accurary on DEVTEST: 0.41197822141560797
	40000 examples trained:
		Accurary on DEV: 0.4581818181818182
	60000 examples trained:
		Accurary on DEV: 0.44727272727272727
	80000 examples trained:
		Accurary on DEV: 0.5163636363636364
		Accurary on DEVTEST: 0.4537205081669691
	100000 examples trained:
		Accurary on DEV: 0.4890909090909091
	120000 examples trained:
		Accurary on DEV: 0.5109090909090909
	120789 examples trained:
		Accurary on DEV: 0.5127272727272727
Epoch 2:
	20000 examples trained:
		Accurary on DEV: 0.5690909090909091
		Accurary on DEVTEST: 0.49364791288566245
	40000 examples trained:
		Accurary on DEV: 0.5436363636363636
	60000 examples trained:
		Accurary on DEV: 0.5272727272727272
	80000 examples trained:
		Accurary on DEV: 0.5836363636363636
		Accurary on DEVTEST: 0.4900181488203267
	100000 examples trained:
		Accurary on DEV: 0.5309090909090909
	120000 examples trained:
		Accurary on DEV: 0.5363

	40000 examples trained:
		Accurary on DEV: 0.6272727272727273
	60000 examples trained:
		Accurary on DEV: 0.6254545454545455
	80000 examples trained:
		Accurary on DEV: 0.6218181818181818
	100000 examples trained:
		Accurary on DEV: 0.6090909090909091
	120000 examples trained:
		Accurary on DEV: 0.6127272727272727
	120789 examples trained:
		Accurary on DEV: 0.610909090909091
Epoch 19:
	20000 examples trained:
		Accurary on DEV: 0.6345454545454545
	40000 examples trained:
		Accurary on DEV: 0.6290909090909091
	60000 examples trained:
		Accurary on DEV: 0.6181818181818182
	80000 examples trained:
		Accurary on DEV: 0.6254545454545455
	100000 examples trained:
		Accurary on DEV: 0.6054545454545455
	120000 examples trained:
		Accurary on DEV: 0.610909090909091
	120789 examples trained:
		Accurary on DEV: 0.6090909090909091
Epoch 20:
	20000 examples trained:
		Accurary on DEV: 0.6363636363636364
	40000 examples trained:
		Accurary on DEV: 0.6290909090909091
	60000 examples trained:
		Accu

In [23]:
print("Bigram Binary Features (Hinge Loss):")
print("\tBest Accuracy on DEV: "+str(accuracy_bb))
print("\tBest Accuracy on DEVTEST: "+str(accuracy_devtest_bb))

Bigram Binary Features (Hinge Loss):
	Best Accuracy on DEV: 0.6472727272727272
	Best Accuracy on DEVTEST: 0.5462794918330308


In [24]:
d2 = weight_analysis(features_bb, labels)

Label 0:
	Top 10 features:
		('lacks', 'the'): 1.31
		('never', 'quite'): 1.26
		("'s", 'hardly'): 1.12
		('a', 'failure'): 1.11
		('lacking', 'in'): 1.08
		('a', 'mess'): 1.08
		('most', 'offensive'): 1.05
		('bad', 'movie'): 1.05
		('pretty', 'mediocre'): 1.04
		('terrible', 'movie'): 1.03
	Bottom 10 features:
		('never', 'dull'): -0.65
		('not', 'too'): -0.61
		('a', 'masterpiece'): -0.58
		('certainly', 'does'): -0.58
		('funny', 'and'): -0.58
		('I', 'liked'): -0.56
		('not', 'without'): -0.56
		('so', 'well'): -0.54
		('good', 'time'): -0.53
		("n't", 'feel'): -0.52
Label 1:
	Top 10 features:
		('Mr', '.'): 0.64
		('most', 'part'): 0.54
		('an', 'exploration'): 0.50
		('elements', '.'): 0.47
		('All', 'Fears'): 0.43
		("'s", 'time'): 0.43
		('anything', 'else'): 0.42
		('Chill', "''"): 0.41
		("''", 'reunion'): 0.41
		('Bartlett', "'s"): 0.40
	Bottom 10 features:
		('too', 'much'): -0.37
		('could', "n't"): -0.36
		('too', 'long'): -0.36
		("'s", 'best'): -0.35
		('as', 'bad'): -

### Normalized features

In [25]:
train_lst_n = preprocess(train, feature="normalized")
dev_lst_n = preprocess(dev, feature="normalized")
devtest_lst_n = preprocess(devtest, feature="normalized")

In [26]:
features_n_0 = extract_features(train_lst_n)

In [27]:
features_n, accuracy_n, accuracy_devtest_n = online_training(train_lst_n, dev_lst_n, devtest_lst_n, features_n_0, labels, hinge=True)

Epoch 1:
	20000 examples trained:
		Accurary on DEV: 0.4490909090909091
		Accurary on DEVTEST: 0.42105263157894735
	40000 examples trained:
		Accurary on DEV: 0.4618181818181818
		Accurary on DEVTEST: 0.4355716878402904
	60000 examples trained:
		Accurary on DEV: 0.4818181818181818
		Accurary on DEVTEST: 0.4537205081669691
	80000 examples trained:
		Accurary on DEV: 0.6218181818181818
		Accurary on DEVTEST: 0.573502722323049
	100000 examples trained:
		Accurary on DEV: 0.6090909090909091
	120000 examples trained:
		Accurary on DEV: 0.6236363636363637
		Accurary on DEVTEST: 0.5698729582577132
	120789 examples trained:
		Accurary on DEV: 0.6272727272727273
		Accurary on DEVTEST: 0.573502722323049
Epoch 2:
	20000 examples trained:
		Accurary on DEV: 0.5618181818181818
	40000 examples trained:
		Accurary on DEV: 0.5709090909090909
	60000 examples trained:
		Accurary on DEV: 0.5690909090909091
	80000 examples trained:
		Accurary on DEV: 0.6436363636363637
		Accurary on DEVTEST: 0.5880217785

	100000 examples trained:
		Accurary on DEV: 0.6509090909090909
	120000 examples trained:
		Accurary on DEV: 0.6509090909090909
	120789 examples trained:
		Accurary on DEV: 0.6527272727272727
Epoch 19:
	20000 examples trained:
		Accurary on DEV: 0.6381818181818182
	40000 examples trained:
		Accurary on DEV: 0.62
	60000 examples trained:
		Accurary on DEV: 0.6218181818181818
	80000 examples trained:
		Accurary on DEV: 0.649090909090909
	100000 examples trained:
		Accurary on DEV: 0.6527272727272727
	120000 examples trained:
		Accurary on DEV: 0.6527272727272727
	120789 examples trained:
		Accurary on DEV: 0.6563636363636364
Epoch 20:
	20000 examples trained:
		Accurary on DEV: 0.6381818181818182
	40000 examples trained:
		Accurary on DEV: 0.6163636363636363
	60000 examples trained:
		Accurary on DEV: 0.6181818181818182
	80000 examples trained:
		Accurary on DEV: 0.6472727272727272
	100000 examples trained:
		Accurary on DEV: 0.6509090909090909
	120000 examples trained:
		Accurary on DEV

In [28]:
print("Normalized Features (Hinge Loss):")
print("\tBest Accuracy on DEV: "+str(accuracy_n))
print("\tBest Accuracy on DEVTEST: "+str(accuracy_devtest_n))

Normalized Features (Hinge Loss):
	Best Accuracy on DEV: 0.6618181818181819
	Best Accuracy on DEVTEST: 0.5970961887477314


In [30]:
d3 = weight_analysis(features_n, labels)

Label 0:
	Top 10 features:
		listless: 1.55
		disappointment: 1.51
		lousy: 1.46
		unfunny: 1.44
		lack: 1.43
		pointless: 1.41
		fails: 1.39
		failure: 1.38
		lackluster: 1.35
		insult: 1.35
	Bottom 10 features:
		refresh: -1.03
		goodnatured: -0.90
		powerful: -0.88
		likable: -0.85
		fascinate: -0.84
		impressive: -0.82
		wonderful: -0.82
		entertaining: -0.76
		lovely: -0.75
		beautifully: -0.74
Label 1:
	Top 10 features:
		greek: 0.72
		left: 0.67
		partner: 0.54
		solondz: 0.53
		50s: 0.46
		warren: 0.43
		1950s: 0.42
		murdock: 0.41
		harvard: 0.41
		deuce: 0.41
	Bottom 10 features:
		flaw: -0.46
		hole: -0.44
		poignant: -0.44
		playful: -0.43
		uneven: -0.43
		attractive: -0.42
		enjoy: -0.42
		perfect: -0.42
		absurd: -0.41
		excite: -0.40
Label 2:
	Top 10 features:
		wonderfully: 1.60
		vividly: 1.59
		delight: 1.42
		pleasant: 1.37
		miracle: 1.30
		heartfelt: 1.29
		proud: 1.26
		elegant: 1.25
		embrace: 1.24
		thoughtprovoking: 1.23
	Bottom 10 features:
		neither: -1.65
	