In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.set_option('display.width', 1000)

In [3]:
import spacy
import neuralcoref
from spacy import displacy


nlp = spacy.load('en_core_web_sm')

In [4]:
# Let's try before using the conversion dictionary:
neuralcoref.add_to_pipe(nlp)

<spacy.lang.en.English at 0x7fd809c47fd0>

### Replace pronouns

In [5]:
doc = nlp(u'The meatballs were not dry, they taste like rubber')

In [6]:
doc._.coref_clusters

[The meatballs: [The meatballs, they]]

In [7]:
doc._.coref_resolved

'The meatballs were not dry, The meatballs taste like rubber'

### Dependencies

In [9]:
doc = nlp("The meatballs were very not dry, they taste like rubber")
for token in doc:
    print(token, token.det_)

AttributeError: 'spacy.tokens.token.Token' object has no attribute 'det_'

In [None]:
# Load opinion lexicon
neg_file = open("opinion-lexicon-English/neg_words.txt",encoding = "ISO-8859-1")
pos_file = open("opinion-lexicon-English/pos_words.txt",encoding = "ISO-8859-1")
neg = [line.strip() for line in neg_file.readlines()]
pos = [line.strip() for line in pos_file.readlines()]
opinion_words = neg + pos


In [None]:
from collections import Counter, defaultdict


In [None]:
def feature_sentiment(sentence):
    '''
    input: dictionary and sentence
    function: appends dictionary with new features if the feature did not exist previously,
              then updates sentiment to each of the new or existing features
    output: updated dictionary
    '''

    sent_dict = Counter()
    sentence = nlp(sentence)
    debug = 0
    for token in sentence:
        # check if the word is an opinion word, then assign sentiment
        if token.text in opinion_words:
            sentiment = 1 if token.text in pos else -1
            
            # if target is an adverb modifier (i.e. pretty, highly, etc.)
            # but happens to be an opinion word, ignore and pass
            if (token.dep_ == "advmod"):
                continue
            elif (token.dep_ == "amod"):
                sent_dict[token.head.text] += sentiment
            # for opinion words that are adjectives, adverbs, verbs...
            else:
                for child in token.children:
                    # if there's a adj modifier (i.e. very, pretty, etc.) add more weight to sentiment
                    # This could be better updated for modifiers that either positively or negatively emphasize
                    if ((child.dep_ == "amod") or (child.dep_ == "advmod")) and (child.text in opinion_words):
                        sentiment *= 1.5
                    # check for negation words and flip the sign of sentiment
                    if child.dep_ == "neg":
                        sentiment *= -1
                        
                for child in token.children:
                    # if verb, check if there's a direct object
                    if (token.pos_ == "VERB") & (child.dep_ == "dobj"):                        
                        sent_dict[child.text] += sentiment
                        # check for conjugates (a AND b), then add both to dictionary
                        subchildren = []
                        conj = 0
                        for subchild in child.children:
                            if subchild.text == "and":
                                conj=1
                            if (conj == 1) and (subchild.text != "and"):
                                subchildren.append(subchild.text)
                                conj = 0
                        for subchild in subchildren:
                            sent_dict[subchild] += sentiment

                # check for negation
                for child in token.head.children:
                    noun = ""
                    if ((child.dep_ == "amod") or (child.dep_ == "advmod")) and (child.text in opinion_words):
                        sentiment *= 1.5
                    # check for negation words and flip the sign of sentiment
                    if (child.dep_ == "neg"): 
                        sentiment *= -1
                
                # check for nouns
                for child in token.head.children:
                    noun = ""
                    if (child.pos_ == "NOUN") and (child.text not in sent_dict):
                        noun = child.text
                        # Check for compound nouns
                        for subchild in child.children:
                            if subchild.dep_ == "compound":
                                noun = subchild.text + " " + noun
                        sent_dict[noun] += sentiment
                    debug += 1
    return sent_dict


In [None]:
# test code for feature sentiment
w= "The sushi here is good. bread was not bad, but the music was freaking terrible."
feature_sentiment(w)

In [None]:
res_reviews = pd.read_csv("Restaurant_Reviews.tsv", sep='\t')

In [None]:
res_reviews

In [None]:
res_reviews.Review

In [None]:
small = res_reviews[:500]

In [None]:
small['aspect'] = small['Review'].apply(lambda x : feature_sentiment(x))

In [None]:
small

In [None]:
for i in small.iloc[498]:
    print(i)