In [13]:
from nltk.corpus import wordnet
import numpy as np
import os
import re
import pandas as pd
from sklearn.metrics import accuracy_score
from tqdm import tqdm
import vsm
import data_loading
import nlu_utils
from collections import defaultdict
import spacy
from spacy.tokenizer import Tokenizer
from itertools import product


%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
# Load the data
sat = data_loading.SAT()
dev = sat.dev()
dev.head()

Unnamed: 0,Difficulty,URL,candidates,id,num_blanks,question,solution_index,source
210,1,,"[concur with, rescind, object to, repeal, disa...",,2,"In the United States, social activists who str...",1,SAT 8-10 Section 3 Q2
183,5,,"[incantatory, economical, disaffected, unstint...",,1,African American poet Lucille Clifton writes i...,1,SAT 8-01 Section 4 Q8
271,4,,"[sycophant, pedant, pundit, nemesis, polymath]",,1,Benjamin Franklin was renowned for being a BLA...,4,SAT 9-10 Section 3 Q6
121,5,,"[obscure, deferential, discriminating, sanctim...",,1,The judges for the chili competition were BLAN...,2,SAT 7-05 (Sat) Section 4 Q8
143,3,,"[invalidates, manifesting, disregards, invigor...",,2,Contemporary Inuit sculpture merges traditiona...,4,SAT 7-05 (Sun) Section 7 Q3


In [4]:
giga_ww_12 = pd.read_csv('../data/GIGA/combined_batch_0_1-4_ww_15000.csv.gz', index_col=0, compression='gzip')
giga_ww_6 = pd.read_csv('../data/GIGA/nyt5_15000.csv.gz', index_col=0, compression='gzip')
giga_wc_6 = None
giga_ww_12.head()

Unnamed: 0,the,to,of,gonna,and,in,that,for,is,on,...,amelio,olestra,hfs,c1996,copyboyatdmaorg,kaczynski,lucent,nyliteatnytimescom,price-weighted,telebras
the,1167459.0,1328065.0,2159367.0,774314.0,1127855.0,1321065.0,595376.0,560326.0,561462.0,494246.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
to,1328065.0,206272.0,384472.0,518677.0,453675.0,335175.0,234121.0,187052.0,218879.0,144155.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
of,2159367.0,384472.0,171812.0,660163.0,517953.0,419704.0,205915.0,176384.0,203834.0,144776.0,...,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
gonna,774314.0,518677.0,660163.0,166982.0,422987.0,475857.0,243403.0,257095.0,248778.0,171332.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
and,1127855.0,453675.0,517953.0,422987.0,136474.0,392647.0,170276.0,193261.0,146064.0,143960.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
ppmi_ww_12 = vsm.pmi(giga_ww_12)

In [11]:
dppmi_ww_12 = nlu_utils.dpmi(giga_ww_12)

In [42]:
ppmi_ww_6 = vsm.pmi(giga_ww_6)

In [43]:
dppmi_ww_6 = nlu_utils.dpmi(giga_ww_6)

In [27]:
class PPMIModel:
    def __init__(self, corpus_pmi, try_synonyms=True, reverse=True, word_counts=word_counts, verbose=False, pos_to_remove=[], include_ancestors=False, include_children=False):
        self.corpus_pmi = corpus_pmi
        self.try_synonyms = try_synonyms
        self.verbose = verbose
        self.word_counts = word_counts
        
        # Features
        self.reverse=reverse
        self.pos_to_remove = pos_to_remove
        self.include_ancestors = include_ancestors
        self.include_children = include_children
    
    def answer(self, problem):
        n_blanks = problem['num_blanks']
        if n_blanks == 1:
            return self.answer1(problem)
        else: # n_blanks == 2
            return self.answer2(problem)
    
    def answer1(self, problem):
        scores = []
        for option in problem["candidates"]:
            scores += [self.score1(problem["question"], option)[0]]
        return [np.argmax(scores)], scores
    
    def answer2(self, problem):
        scores = []
        for option in problem["candidates"]:
            cand0, cand1 = option.split(',')
            scores += [self.score2(problem["question"], cand0, cand1)[0]]
        return [np.argmax(scores)], scores
    
    def score1(self, sentence, proposal):
        full_sentence = self.substitute1(sentence, proposal)
        doc = nlu_utils.get_spacy_doc(full_sentence)
        _, proposal_token = nlu_utils.get_token(doc, proposal)
        features = self.featurize(doc, proposal_token)
        
        indiv_scores = {}

        if self.try_synonyms:
            pos = nlu_utils.spacy_to_wn_tag(proposal_token.pos_)
            synonyms = nlu_utils.get_alternate_words(proposal_token.norm_, pos, self.word_counts, reverse=self.reverse)

        tot_score = 0
        for token in features:
            if token == proposal_token:
                continue
            if token.is_punct or token.is_space:
                continue
            score = self.ppmi(proposal_token.norm_, token.norm_)
            if score is None and self.try_synonyms:
                score = self.approx_ppmi(proposal_token, synonyms, token)
            tot_score += score if score is not None else 0
            indiv_scores[str(token)] = score if score is not None else 0
        return tot_score, indiv_scores
    
    def score2(self, sentence, proposal0, proposal1):
        full_sentence = self.substitute2(sentence, proposal0, proposal1)
        doc = nlu_utils.get_spacy_doc(full_sentence)
        _, proposal_token0 = nlu_utils.get_token(doc, proposal0)
        _, proposal_token1 = nlu_utils.get_token(doc, proposal1)
        
        features = []
        features.append(self.featurize(doc, proposal_token0))
        features.append(self.featurize(doc, proposal_token1))
        
        indiv_scores = [{},{}]

        synonyms0 = None
        synonyms1 = None
        if self.try_synonyms:
            pos0 = nlu_utils.spacy_to_wn_tag(proposal_token0.pos_)
            synonyms0 = nlu_utils.get_alternate_words(proposal_token0.norm_, pos0)
            
            pos1 = nlu_utils.spacy_to_wn_tag(proposal_token1.pos_)
            synonyms1 = nlu_utils.get_alternate_words(proposal_token1.norm_, pos1)

        tot_score = 0
        for i, (proposal_token, synonyms) in enumerate([(proposal_token0, synonyms0), (proposal_token1, synonyms1)]):
            for token in features[i]:
                if token == proposal_token0:
                    continue
                if token == proposal_token1:
                    continue
                if token.is_punct or token.is_space:
                    continue
                score = self.ppmi(proposal_token.norm_, token.norm_)
                if score is None and self.try_synonyms:
                    score = self.approx_ppmi(proposal_token, synonyms, token)
                tot_score += score if score is not None else 0
                indiv_scores[i][str(token)] = score if score is not None else 0
        return tot_score, indiv_scores
    
    def approx_ppmi(self, proposal_token, proposal_synonyms, word_token):
        pos = nlu_utils.spacy_to_wn_tag(word_token.pos_)
        word_synonyms = nlu_utils.get_alternate_words(word_token.norm_, pos)
        # try matching using different versions of the proposal word
        for psyn in proposal_synonyms:
            score = self.ppmi(psyn, word_token.norm_)
            if score is not None:
                if self.verbose:
                    print("Used synonym for proposal word: {} -> {}".format(proposal_token.text, psyn))
                return score
        # try matching using different versions of the non-proposal word
        for wsyn in word_synonyms:
            score = self.ppmi(wsyn, proposal_token.norm_)
            if score is not None:
                if self.verbose:
                    print("Used synonym: {} -> {}".format(word_token.text, wsyn))
                return score
        # Next just try all combos
        for psyn in proposal_synonyms:
            for wsyn in word_synonyms:
                score = self.ppmi(psyn, word_token.norm_)
                if score is not None:
                    if self.verbose:
                        print("Used synonym: {} -> {} and {} -> {}".format(proposal_token.text, psyn, word_token.text, wsyn))
                    return score
        if self.verbose:
            print("UNABLE TO FIND ANY SYNONYMS IN VOCABULARY")
        return None

    def ppmi(self, proposal, word):
        try:
            return self.corpus_pmi.loc[proposal, word]
        except KeyError:
            return None
        
    def substitute1(self, sentence, proposal):
        new_sentence = re.sub('BLANK', proposal, sentence)
        if 'BLANK' in new_sentence:
            print (sentence)
            assert False
            
        return new_sentence
    
    def substitute2(self, sentence, proposal0, proposal1):
        new_sentence = re.sub('BLANK0', proposal0, sentence)
        new_sentence = re.sub('BLANK1', proposal1, new_sentence)
        if new_sentence == sentence:
            print (sentence)
            assert False
            
        return new_sentence
    
    
    def featurize(self, doc, proposal_token):
        features = []
        # Remove determiners, coordinating conjunctions, pronouns, add everything else to feature set
        for token in doc:
            if token.pos_ not in self.pos_to_remove: #['DET', 'PRON', 'CCONJ']:
                features.append(token)
        # Add dependencies to the feature set again (effectively increase their weight)
        if self.include_ancestors:
            features += nlu_utils.get_ancestors_of_word(proposal_token, self.pos_to_remove)
        if self.include_children:
            features += nlu_utils.get_children_of_word(proposal_token, self.pos_to_remove)
        return features  
    
    
    
    
    
    
    

In [35]:
def get_word_counts(df):
    counts = defaultdict(int)
    for word in df.columns.values:
        counts[word] = df[word].sum()
    return counts

In [36]:
giga_ww_12_word_counts = get_word_counts(giga_ww_12)

In [41]:
"""
DPPMI synonyms, no synonyms
PPMI reverse synonyms
remove stopwords
include children
include ancestors
"""


ppmi = [(ppmi_ww_12, "ppmi_ww_12"), (dppmi_ww_12, "dppmi_ww_12")]
reverse = [False, True]
pos_to_remove = [['DET', 'PRON', 'CCONJ'], []]
include_ancestors = [False, True]
include_children = [False, True]

for p, r, o, a, c in product(ppmi, reverse, pos_to_remove, include_ancestors, include_children):
    model = PPMIModel(p[0], try_synonyms=True, word_counts=giga_ww_12_word_counts, verbose=False, pos_to_remove=o, include_ancestors=a, include_children=c)
    print("PPMI: {} | Reverse synonyms: {} | Remove POS: {} | Include ancestors: {} | Include children: {}".format(p[1], r, len(o)>0, a, c))
    preds = make_predictions(model, dev)
    
    correct_incorrect(preds)
    print("-------------------------------------------------------------------------------------------------\n")


0it [00:00, ?it/s]

PPMI: ppmi_ww_12 | Reverse synonyms: False | Remove POS: True | Include ancestors: False | Include children: False


194it [01:19,  2.43it/s]
0it [00:00, ?it/s]

Accuracy: 0.26288659793814434

Correct num_blanks counts: {1: 36, 2: 15}
Correct difficulty counts: {5: 8, 1: 13, 4: 8, 3: 16, 2: 6}

Inorrect num_blanks counts: {2: 71, 1: 72}
Inorrect difficulty counts: {1: 33, 4: 19, 5: 34, 3: 42, 2: 15}
-------------------------------------------------------------------------------------------------

PPMI: ppmi_ww_12 | Reverse synonyms: False | Remove POS: True | Include ancestors: False | Include children: True


194it [01:27,  2.22it/s]
0it [00:00, ?it/s]

Accuracy: 0.25773195876288657

Correct num_blanks counts: {1: 39, 2: 11}
Correct difficulty counts: {5: 8, 1: 14, 3: 17, 4: 8, 2: 3}

Inorrect num_blanks counts: {2: 75, 1: 69}
Inorrect difficulty counts: {1: 32, 4: 19, 5: 34, 3: 41, 2: 18}
-------------------------------------------------------------------------------------------------

PPMI: ppmi_ww_12 | Reverse synonyms: False | Remove POS: True | Include ancestors: True | Include children: False


194it [01:46,  1.82it/s]
0it [00:00, ?it/s]

Accuracy: 0.27835051546391754

Correct num_blanks counts: {1: 39, 2: 15}
Correct difficulty counts: {5: 8, 1: 14, 4: 10, 3: 16, 2: 6}

Inorrect num_blanks counts: {2: 71, 1: 69}
Inorrect difficulty counts: {1: 32, 4: 17, 5: 34, 3: 42, 2: 15}
-------------------------------------------------------------------------------------------------

PPMI: ppmi_ww_12 | Reverse synonyms: False | Remove POS: True | Include ancestors: True | Include children: True


194it [02:00,  1.61it/s]
0it [00:00, ?it/s]

Accuracy: 0.27835051546391754

Correct num_blanks counts: {1: 41, 2: 13}
Correct difficulty counts: {5: 8, 1: 14, 3: 17, 4: 10, 2: 5}

Inorrect num_blanks counts: {2: 73, 1: 67}
Inorrect difficulty counts: {1: 32, 4: 17, 5: 34, 3: 41, 2: 16}
-------------------------------------------------------------------------------------------------

PPMI: ppmi_ww_12 | Reverse synonyms: False | Remove POS: False | Include ancestors: False | Include children: False


194it [01:53,  1.72it/s]
0it [00:00, ?it/s]

Accuracy: 0.28350515463917525

Correct num_blanks counts: {1: 39, 2: 16}
Correct difficulty counts: {5: 10, 1: 15, 4: 9, 3: 16, 2: 5}

Inorrect num_blanks counts: {2: 70, 1: 69}
Inorrect difficulty counts: {1: 31, 4: 18, 5: 32, 3: 42, 2: 16}
-------------------------------------------------------------------------------------------------

PPMI: ppmi_ww_12 | Reverse synonyms: False | Remove POS: False | Include ancestors: False | Include children: True


194it [01:47,  1.81it/s]
0it [00:00, ?it/s]

Accuracy: 0.28350515463917525

Correct num_blanks counts: {1: 39, 2: 16}
Correct difficulty counts: {5: 11, 1: 14, 3: 16, 4: 9, 2: 5}

Inorrect num_blanks counts: {2: 70, 1: 69}
Inorrect difficulty counts: {1: 32, 4: 18, 5: 31, 3: 42, 2: 16}
-------------------------------------------------------------------------------------------------

PPMI: ppmi_ww_12 | Reverse synonyms: False | Remove POS: False | Include ancestors: True | Include children: False


194it [01:45,  1.83it/s]
0it [00:00, ?it/s]

Accuracy: 0.30412371134020616

Correct num_blanks counts: {1: 40, 2: 19}
Correct difficulty counts: {5: 10, 1: 15, 4: 11, 3: 17, 2: 6}

Inorrect num_blanks counts: {2: 67, 1: 68}
Inorrect difficulty counts: {1: 31, 4: 16, 5: 32, 3: 41, 2: 15}
-------------------------------------------------------------------------------------------------

PPMI: ppmi_ww_12 | Reverse synonyms: False | Remove POS: False | Include ancestors: True | Include children: True


194it [01:53,  1.70it/s]
0it [00:00, ?it/s]

Accuracy: 0.29896907216494845

Correct num_blanks counts: {1: 41, 2: 17}
Correct difficulty counts: {5: 11, 1: 15, 3: 16, 4: 11, 2: 5}

Inorrect num_blanks counts: {2: 69, 1: 67}
Inorrect difficulty counts: {1: 31, 4: 16, 5: 31, 3: 42, 2: 16}
-------------------------------------------------------------------------------------------------

PPMI: ppmi_ww_12 | Reverse synonyms: True | Remove POS: True | Include ancestors: False | Include children: False


194it [01:43,  1.88it/s]
0it [00:00, ?it/s]

Accuracy: 0.26288659793814434

Correct num_blanks counts: {1: 36, 2: 15}
Correct difficulty counts: {5: 8, 1: 13, 4: 8, 3: 16, 2: 6}

Inorrect num_blanks counts: {2: 71, 1: 72}
Inorrect difficulty counts: {1: 33, 4: 19, 5: 34, 3: 42, 2: 15}
-------------------------------------------------------------------------------------------------

PPMI: ppmi_ww_12 | Reverse synonyms: True | Remove POS: True | Include ancestors: False | Include children: True


194it [01:53,  1.71it/s]
0it [00:00, ?it/s]

Accuracy: 0.25773195876288657

Correct num_blanks counts: {1: 39, 2: 11}
Correct difficulty counts: {5: 8, 1: 14, 3: 17, 4: 8, 2: 3}

Inorrect num_blanks counts: {2: 75, 1: 69}
Inorrect difficulty counts: {1: 32, 4: 19, 5: 34, 3: 41, 2: 18}
-------------------------------------------------------------------------------------------------

PPMI: ppmi_ww_12 | Reverse synonyms: True | Remove POS: True | Include ancestors: True | Include children: False


194it [02:06,  1.54it/s]
0it [00:00, ?it/s]

Accuracy: 0.27835051546391754

Correct num_blanks counts: {1: 39, 2: 15}
Correct difficulty counts: {5: 8, 1: 14, 4: 10, 3: 16, 2: 6}

Inorrect num_blanks counts: {2: 71, 1: 69}
Inorrect difficulty counts: {1: 32, 4: 17, 5: 34, 3: 42, 2: 15}
-------------------------------------------------------------------------------------------------

PPMI: ppmi_ww_12 | Reverse synonyms: True | Remove POS: True | Include ancestors: True | Include children: True


194it [02:00,  1.61it/s]
0it [00:00, ?it/s]

Accuracy: 0.27835051546391754

Correct num_blanks counts: {1: 41, 2: 13}
Correct difficulty counts: {5: 8, 1: 14, 3: 17, 4: 10, 2: 5}

Inorrect num_blanks counts: {2: 73, 1: 67}
Inorrect difficulty counts: {1: 32, 4: 17, 5: 34, 3: 41, 2: 16}
-------------------------------------------------------------------------------------------------

PPMI: ppmi_ww_12 | Reverse synonyms: True | Remove POS: False | Include ancestors: False | Include children: False


194it [04:13,  1.31s/it]
0it [00:00, ?it/s]

Accuracy: 0.28350515463917525

Correct num_blanks counts: {1: 39, 2: 16}
Correct difficulty counts: {5: 10, 1: 15, 4: 9, 3: 16, 2: 5}

Inorrect num_blanks counts: {2: 70, 1: 69}
Inorrect difficulty counts: {1: 31, 4: 18, 5: 32, 3: 42, 2: 16}
-------------------------------------------------------------------------------------------------

PPMI: ppmi_ww_12 | Reverse synonyms: True | Remove POS: False | Include ancestors: False | Include children: True


194it [02:21,  1.38it/s]
0it [00:00, ?it/s]

Accuracy: 0.28350515463917525

Correct num_blanks counts: {1: 39, 2: 16}
Correct difficulty counts: {5: 11, 1: 14, 3: 16, 4: 9, 2: 5}

Inorrect num_blanks counts: {2: 70, 1: 69}
Inorrect difficulty counts: {1: 32, 4: 18, 5: 31, 3: 42, 2: 16}
-------------------------------------------------------------------------------------------------

PPMI: ppmi_ww_12 | Reverse synonyms: True | Remove POS: False | Include ancestors: True | Include children: False


194it [02:08,  1.51it/s]
0it [00:00, ?it/s]

Accuracy: 0.30412371134020616

Correct num_blanks counts: {1: 40, 2: 19}
Correct difficulty counts: {5: 10, 1: 15, 4: 11, 3: 17, 2: 6}

Inorrect num_blanks counts: {2: 67, 1: 68}
Inorrect difficulty counts: {1: 31, 4: 16, 5: 32, 3: 41, 2: 15}
-------------------------------------------------------------------------------------------------

PPMI: ppmi_ww_12 | Reverse synonyms: True | Remove POS: False | Include ancestors: True | Include children: True


194it [02:03,  1.57it/s]
0it [00:00, ?it/s]

Accuracy: 0.29896907216494845

Correct num_blanks counts: {1: 41, 2: 17}
Correct difficulty counts: {5: 11, 1: 15, 3: 16, 4: 11, 2: 5}

Inorrect num_blanks counts: {2: 69, 1: 67}
Inorrect difficulty counts: {1: 31, 4: 16, 5: 31, 3: 42, 2: 16}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: False | Remove POS: True | Include ancestors: False | Include children: False


194it [01:38,  1.98it/s]
0it [00:00, ?it/s]

Accuracy: 0.27319587628865977

Correct num_blanks counts: {1: 37, 2: 16}
Correct difficulty counts: {5: 7, 1: 13, 3: 17, 4: 9, 2: 7}

Inorrect num_blanks counts: {2: 70, 1: 71}
Inorrect difficulty counts: {1: 33, 4: 18, 5: 35, 3: 41, 2: 14}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: False | Remove POS: True | Include ancestors: False | Include children: True


194it [01:41,  1.92it/s]
0it [00:00, ?it/s]

Accuracy: 0.29381443298969073

Correct num_blanks counts: {1: 40, 2: 17}
Correct difficulty counts: {5: 7, 1: 15, 3: 19, 4: 9, 2: 7}

Inorrect num_blanks counts: {2: 69, 1: 68}
Inorrect difficulty counts: {1: 31, 4: 18, 5: 35, 3: 39, 2: 14}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: False | Remove POS: True | Include ancestors: True | Include children: False


194it [01:52,  1.72it/s]
0it [00:00, ?it/s]

Accuracy: 0.29381443298969073

Correct num_blanks counts: {1: 40, 2: 17}
Correct difficulty counts: {5: 7, 1: 14, 4: 11, 3: 18, 2: 7}

Inorrect num_blanks counts: {2: 69, 1: 68}
Inorrect difficulty counts: {1: 32, 4: 16, 5: 35, 3: 40, 2: 14}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: False | Remove POS: True | Include ancestors: True | Include children: True


194it [02:10,  1.48it/s]
0it [00:00, ?it/s]

Accuracy: 0.30927835051546393

Correct num_blanks counts: {1: 43, 2: 17}
Correct difficulty counts: {5: 7, 1: 16, 3: 19, 4: 11, 2: 7}

Inorrect num_blanks counts: {2: 69, 1: 65}
Inorrect difficulty counts: {1: 30, 4: 16, 5: 35, 3: 39, 2: 14}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: False | Remove POS: False | Include ancestors: False | Include children: False


194it [01:39,  1.95it/s]
0it [00:00, ?it/s]

Accuracy: 0.27835051546391754

Correct num_blanks counts: {1: 37, 2: 17}
Correct difficulty counts: {5: 8, 1: 14, 3: 16, 4: 9, 2: 7}

Inorrect num_blanks counts: {2: 69, 1: 71}
Inorrect difficulty counts: {1: 32, 4: 18, 5: 34, 3: 42, 2: 14}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: False | Remove POS: False | Include ancestors: False | Include children: True


194it [01:47,  1.81it/s]
0it [00:00, ?it/s]

Accuracy: 0.28865979381443296

Correct num_blanks counts: {1: 39, 2: 17}
Correct difficulty counts: {5: 9, 1: 15, 3: 17, 4: 9, 2: 6}

Inorrect num_blanks counts: {2: 69, 1: 69}
Inorrect difficulty counts: {1: 31, 4: 18, 5: 33, 3: 41, 2: 15}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: False | Remove POS: False | Include ancestors: True | Include children: False


194it [02:01,  1.60it/s]
0it [00:00, ?it/s]

Accuracy: 0.28865979381443296

Correct num_blanks counts: {1: 40, 2: 16}
Correct difficulty counts: {5: 7, 1: 14, 4: 11, 3: 17, 2: 7}

Inorrect num_blanks counts: {2: 70, 1: 68}
Inorrect difficulty counts: {1: 32, 4: 16, 5: 35, 3: 41, 2: 14}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: False | Remove POS: False | Include ancestors: True | Include children: True


194it [02:02,  1.58it/s]
0it [00:00, ?it/s]

Accuracy: 0.30927835051546393

Correct num_blanks counts: {1: 43, 2: 17}
Correct difficulty counts: {5: 8, 1: 16, 3: 18, 4: 11, 2: 7}

Inorrect num_blanks counts: {2: 69, 1: 65}
Inorrect difficulty counts: {1: 30, 4: 16, 5: 34, 3: 40, 2: 14}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: True | Remove POS: True | Include ancestors: False | Include children: False


194it [01:38,  1.98it/s]
0it [00:00, ?it/s]

Accuracy: 0.27319587628865977

Correct num_blanks counts: {1: 37, 2: 16}
Correct difficulty counts: {5: 7, 1: 13, 3: 17, 4: 9, 2: 7}

Inorrect num_blanks counts: {2: 70, 1: 71}
Inorrect difficulty counts: {1: 33, 4: 18, 5: 35, 3: 41, 2: 14}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: True | Remove POS: True | Include ancestors: False | Include children: True


194it [01:41,  1.92it/s]
0it [00:00, ?it/s]

Accuracy: 0.29381443298969073

Correct num_blanks counts: {1: 40, 2: 17}
Correct difficulty counts: {5: 7, 1: 15, 3: 19, 4: 9, 2: 7}

Inorrect num_blanks counts: {2: 69, 1: 68}
Inorrect difficulty counts: {1: 31, 4: 18, 5: 35, 3: 39, 2: 14}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: True | Remove POS: True | Include ancestors: True | Include children: False


194it [01:58,  1.64it/s]
0it [00:00, ?it/s]

Accuracy: 0.29381443298969073

Correct num_blanks counts: {1: 40, 2: 17}
Correct difficulty counts: {5: 7, 1: 14, 4: 11, 3: 18, 2: 7}

Inorrect num_blanks counts: {2: 69, 1: 68}
Inorrect difficulty counts: {1: 32, 4: 16, 5: 35, 3: 40, 2: 14}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: True | Remove POS: True | Include ancestors: True | Include children: True


194it [01:53,  1.71it/s]
0it [00:00, ?it/s]

Accuracy: 0.30927835051546393

Correct num_blanks counts: {1: 43, 2: 17}
Correct difficulty counts: {5: 7, 1: 16, 3: 19, 4: 11, 2: 7}

Inorrect num_blanks counts: {2: 69, 1: 65}
Inorrect difficulty counts: {1: 30, 4: 16, 5: 35, 3: 39, 2: 14}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: True | Remove POS: False | Include ancestors: False | Include children: False


194it [01:39,  1.96it/s]
0it [00:00, ?it/s]

Accuracy: 0.27835051546391754

Correct num_blanks counts: {1: 37, 2: 17}
Correct difficulty counts: {5: 8, 1: 14, 3: 16, 4: 9, 2: 7}

Inorrect num_blanks counts: {2: 69, 1: 71}
Inorrect difficulty counts: {1: 32, 4: 18, 5: 34, 3: 42, 2: 14}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: True | Remove POS: False | Include ancestors: False | Include children: True


194it [01:44,  1.86it/s]
0it [00:00, ?it/s]

Accuracy: 0.28865979381443296

Correct num_blanks counts: {1: 39, 2: 17}
Correct difficulty counts: {5: 9, 1: 15, 3: 17, 4: 9, 2: 6}

Inorrect num_blanks counts: {2: 69, 1: 69}
Inorrect difficulty counts: {1: 31, 4: 18, 5: 33, 3: 41, 2: 15}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: True | Remove POS: False | Include ancestors: True | Include children: False


194it [02:01,  1.60it/s]
0it [00:00, ?it/s]

Accuracy: 0.28865979381443296

Correct num_blanks counts: {1: 40, 2: 16}
Correct difficulty counts: {5: 7, 1: 14, 4: 11, 3: 17, 2: 7}

Inorrect num_blanks counts: {2: 70, 1: 68}
Inorrect difficulty counts: {1: 32, 4: 16, 5: 35, 3: 41, 2: 14}
-------------------------------------------------------------------------------------------------

PPMI: dppmi_ww_12 | Reverse synonyms: True | Remove POS: False | Include ancestors: True | Include children: True


194it [02:05,  1.54it/s]

Accuracy: 0.30927835051546393

Correct num_blanks counts: {1: 43, 2: 17}
Correct difficulty counts: {5: 8, 1: 16, 3: 18, 4: 11, 2: 7}

Inorrect num_blanks counts: {2: 69, 1: 65}
Inorrect difficulty counts: {1: 30, 4: 16, 5: 34, 3: 40, 2: 14}
-------------------------------------------------------------------------------------------------






In [37]:
giga_ww_6_word_counts = get_word_counts(giga_ww_6)

In [None]:
ppmi = [(ppmi_ww_6, "ppmi_ww_6"), (dppmi_ww_6, "dppmi_ww_6")]
reverse = [False, True]
pos_to_remove = [['DET', 'PRON', 'CCONJ'], []]
include_ancestors = [False, True]
include_children = [False, True]

count = 1
for p, r, o, a, c in product(ppmi, reverse, pos_to_remove, include_ancestors, include_children):
    model = PPMIModel(p[0], try_synonyms=True, word_counts=giga_ww_6_word_counts, verbose=False, pos_to_remove=o, include_ancestors=a, include_children=c)
    print("#{}".format(count))
    count += 1
    
    print("PPMI: {} | Reverse synonyms: {} | Remove POS: {} | Include ancestors: {} | Include children: {}".format(p[1], r, len(o)>0, a, c))
    preds = make_predictions(model, dev)
    
    correct_incorrect(preds)
    print("-------------------------------------------------------------------------------------------------\n")

0it [00:00, ?it/s]

#1
PPMI: ppmi_ww_6 | Reverse synonyms: False | Remove POS: True | Include ancestors: False | Include children: False


194it [01:32,  2.09it/s]
0it [00:00, ?it/s]

Accuracy: 0.26804123711340205

Correct num_blanks counts: {1: 37, 2: 15}
Correct difficulty counts: {5: 10, 2: 6, 1: 16, 3: 14, 4: 6}

Inorrect num_blanks counts: {2: 71, 1: 71}
Inorrect difficulty counts: {1: 30, 4: 21, 3: 44, 5: 32, 2: 15}
-------------------------------------------------------------------------------------------------

#2
PPMI: ppmi_ww_6 | Reverse synonyms: False | Remove POS: True | Include ancestors: False | Include children: True


194it [01:35,  2.02it/s]
0it [00:00, ?it/s]

Accuracy: 0.27319587628865977

Correct num_blanks counts: {1: 36, 2: 17}
Correct difficulty counts: {5: 10, 2: 6, 1: 16, 3: 15, 4: 6}

Inorrect num_blanks counts: {2: 69, 1: 72}
Inorrect difficulty counts: {1: 30, 4: 21, 3: 43, 5: 32, 2: 15}
-------------------------------------------------------------------------------------------------

#3
PPMI: ppmi_ww_6 | Reverse synonyms: False | Remove POS: True | Include ancestors: True | Include children: False


160it [01:49,  1.46it/s]

In [40]:
def make_predictions(ppmi_model, data):
    """
    Make predictions, store info in a list of small dicts, return the list
    """
    predictions = []
    for i, (_, problem) in tqdm(enumerate(dev.iterrows())):
        ans, scores = model.answer(problem)
        predictions.append({'problem': problem, 'ans': ans, 'scores': scores})
    
    return predictions



def correct_incorrect(predictions, verbose=True):
    """
    Separates predictions into correct and incorrect ones, returns those lists
    If verbose, prints out accuracy, and how many 1- or 2-blank questions were in each category,
        as well as the difficulty distributions of correct and incorrect predictions
    """
    correct = []
    incorrect = []
    
    correct_blanks = defaultdict(int)
    incorrect_blanks = defaultdict(int)
    
    correct_diff = defaultdict(int)
    incorrect_diff = defaultdict(int)
    
    for pred in predictions:
        if pred['problem']['solution_index'] == pred['ans'][0]:
            correct.append(pred)
            correct_blanks[pred['problem']['num_blanks']] += 1
            correct_diff[pred['problem']['Difficulty']] += 1
        else:
            incorrect.append(pred)
            incorrect_blanks[pred['problem']['num_blanks']] += 1
            incorrect_diff[pred['problem']['Difficulty']] += 1
    
    if verbose:
        print("Accuracy: {}\n".format(len(correct)*1.0/(len(correct) + len(incorrect))))
        print("Correct num_blanks counts: {}".format(dict(correct_blanks)))
        print("Correct difficulty counts: {}\n".format(dict(correct_diff)))
        print("Inorrect num_blanks counts: {}".format(dict(incorrect_blanks)))
        print("Inorrect difficulty counts: {}".format(dict(incorrect_diff)))
        
        
    return correct, incorrect

            

    
def pretty_print_prediction(prediction):
    print(prediction['problem']['question'])
    print(prediction['problem'])
    print(prediction['ans'])
    print(prediction['scores'])    
    print("------------------------------------------------------------------------------")
    
    
# TODO: for some reason some candidate words don't return any ppmi value for some words in sentence
# That's why need to pass in spacy_nlp
# But once I implemented that, the problem fixed itself for the one example I looked at??
def print_with_ppmi_scores(prediction, ppmi_model, spacy_nlp):
    """
    To look at pairwise ppmi scores for any particular example (btwn candidates and sentence words)
    """
    candidates = prediction['problem']['candidates']
    sentence = prediction['problem']['question']
    print(sentence)
    print("")
    print("Candidates: {}".format(candidates))
    print("Scores: {}".format(prediction['scores']))
    print("")
    print("Prediction: {}\tSolution: {}\n".format(prediction['ans'][0], prediction['problem']['solution_index']))
    print("")
    
    # Dict mapping from string (word in sentence) to list
    # First element of list is the ppmi score of first candidate, second is second candidate, etc
    indiv_scores_reverse = defaultdict(list)
    
    if prediction['problem']['num_blanks'] == 1:
        for c in candidates:
            _, indiv_scores = model.score1(sentence, c)
            tokens = spacy_nlp(sentence)
            for t in tokens:
                if str(t) in indiv_scores:
                    score = round(float(indiv_scores[str(t)]), 5)
                    indiv_scores_reverse[str(t)].append(score)
                elif t.is_punct or t.is_space or 'BLANK' in str(t):
                    continue
                # NOT EVERY WORD IS SCORED SOMETIMES, FOR SOME REASON??
                # But once I implemented this, it fixed itself?
                else:
                    indiv_scores_reverse[str(t)].append('?')
                    
                    
        columns = "{0:12}|{1:15}|{2:15}|{3:15}|{4:15}|{5:15}"
        header_str = columns.format(*([""] + candidates))
        print(header_str)
        for word, scores in indiv_scores_reverse.items():
            print(columns.format(*([word] + scores)))
    
    if prediction['problem']['num_blanks'] == 2:
        for c in candidates:
            two = c.split(', ')
            c1 = two[0]
            c2 = two[1]
            _, indiv_scores = model.score2(sentence, c1, c2)
            tokens = spacy_nlp(sentence)
            
            for t in tokens:
                if str(t) in indiv_scores[0]:
                    score1 = round(float(indiv_scores[0][str(t)]), 4)
                    score2 = round(float(indiv_scores[1][str(t)]), 4)
                    indiv_scores_reverse[str(t)].append(str((score1, score2)))
                elif t.is_punct or t.is_space or 'BLANK' in str(t):
                    continue
                # NOT EVERY WORD IS SCORED SOMETIMES, FOR SOME REASON
                # But once I implemented this, it fixed itself?
                else:
                    indiv_scores_reverse[str(t)].append(('?', '?'))

            
        columns = "{0:12}|{1:15}|{2:15}|{3:15}|{4:15}|{5:15}"
        header_str1 = columns.format(*([""] + [c.split(', ')[0] for c in candidates]))
        header_str2 = columns.format(*([""] + [c.split(', ')[1] for c in candidates]))
        print(header_str1)
        print(header_str2)
        for word, scores in indiv_scores_reverse.items():
            tuple_to_print = tuple([word] + scores)
#             print(tuple_to_print)
            print(columns.format(*tuple_to_print))
        
    
    print("------------------------------------------------------------------------------")
    
    
    
    

In [60]:
predictions = make_predictions(model, dev)
print (accuracy(predictions))

0.18041237113402062


In [219]:
nlp = spacy.load('en_core_web_sm')
print_with_ppmi_scores(predictions[7], model, nlp)

The Mona Lisa, shipped in a private cabin and received by important dignitaries, was treated more like BLANK than a painting upon its arrival in the United States.

Candidates: ['a perfectionist', 'a maverick', 'a potentate', 'an ascetic', 'an interloper']
Scores: [3.0531746829333235, 3.6827076962568497, 3.0531746829333235, 3.770518817129643, 3.770518817129643]

Prediction: 3	Solution: 2


            |a perfectionist|a maverick     |a potentate    |an ascetic     |an interloper  
The         |            0.0|            0.0|            0.0|            0.0|            0.0
Mona        |            0.0|            0.0|            0.0|            0.0|            0.0
Lisa        |        0.17118|        0.17118|        0.17118|        0.04053|        0.04053
shipped     |            0.0|            0.0|            0.0|            0.0|            0.0
in          |        0.10968|        0.10968|        0.10968|        0.10968|        0.10968
a           |            0.0|            0.0|    

In [38]:
correct, incorrect = correct_incorrect(predictions)

Accuracy: 0.18041237113402062

Correct num_blanks counts: {2: 14, 1: 21}
Correct difficulty counts: {3: 14, 2: 3, 5: 4, 1: 11, 4: 3}

Inorrect num_blanks counts: {2: 72, 1: 87}
Inorrect difficulty counts: {1: 35, 5: 38, 4: 24, 3: 44, 2: 18}


In [218]:
for index, i in enumerate(incorrect):
    print("Index: {}".format(index))
    pretty_print_prediction(i)

Index: 0
In the United States, social activists who strongly BLANK0 a particular law can attempt to obtain a constitutional amendment to BLANK1 it.
Difficulty                                                        1
URL                                                                
candidates        [concur with, rescind, object to, repeal, disa...
id                                                             None
num_blanks                                                        2
question          In the United States, social activists who str...
solution_index                                                    1
source                                        SAT 8-10 Section 3 Q2
Name: 210, dtype: object
[2]
[0, 13.876409877668301, 16.47472762156393, 9.990117159522276, 0]
------------------------------------------------------------------------------
Index: 1
African American poet Lucille Clifton writes in a notably BLANK style, achieving great impact in a few unadorned words.
Diffic

Name: 127, dtype: object
[1]
[0, 3.365140353815311, 0, 0, 0]
------------------------------------------------------------------------------
Index: 147
Nineteenth-century Plains Indians valued BLANK quite highly and expected their elites to be the most giving of all.
Difficulty                                                        5
URL                                                                
candidates        [intrepidity, reticence, candor, jocularity, m...
id                                                             None
num_blanks                                                        1
question          Nineteenth-century Plains Indians valued BLANK...
solution_index                                                    4
source                                        SAT 8-10 Section 9 Q6
Name: 227, dtype: object
[0]
[0, 0, 0, 0, 0]
------------------------------------------------------------------------------
Index: 148
It was not until Dr. Anna Reinstein BLANK0 the results