In [1]:
import spacy
import neuralcoref
from spacy import displacy


nlp = spacy.load('en_core_web_sm')

In [2]:
# Let's try before using the conversion dictionary:
neuralcoref.add_to_pipe(nlp)

<spacy.lang.en.English at 0x7fc3070d2a50>

### Replace pronouns

In [3]:
doc = nlp(u'The meatballs were not dry, they taste like rubber')

In [4]:
doc._.coref_clusters

[The meatballs: [The meatballs, they]]

In [5]:
doc._.coref_resolved

'The meatballs were not dry, The meatballs taste like rubber'

### Dependencies

In [6]:
doc = nlp("The meatballs were very dry, they taste like rubber")
token = doc[1]
print(token)

meatballs


In [7]:
token.children

<generator at 0x7fc30618c230>

In [8]:
for i in token.children:
    print(i)

The


In [9]:
# nlp = spacy.load("en_core_web_sm")
# doc = nlp(u'The meatballs were very dry, they taste like rubber')
# # displacy.serve(doc, style="dep")
# displacy.render(doc, style='dep',jupyter=True)


In [10]:
# Load opinion lexicon
neg_file = open("opinion-lexicon-English/neg_words.txt",encoding = "ISO-8859-1")
pos_file = open("opinion-lexicon-English/pos_words.txt",encoding = "ISO-8859-1")
neg = [line.strip() for line in neg_file.readlines()]
pos = [line.strip() for line in pos_file.readlines()]
opinion_words = neg + pos


In [11]:
from collections import Counter, defaultdict


In [44]:
def feature_sentiment(sentence):
    '''
    input: dictionary and sentence
    function: appends dictionary with new features if the feature did not exist previously,
              then updates sentiment to each of the new or existing features
    output: updated dictionary
    '''

    sent_dict = Counter()
    word_dict = {}
    sentence = nlp(sentence)
    debug = 0
    for token in sentence:
        # check if the word is an opinion word, then assign sentiment
        if token.text in opinion_words:
#             print(token.head)
            sentiment = 1 if token.text in pos else -1
            
            # if target is an adverb modifier (i.e. pretty, highly, etc.)
            # but happens to be an opinion word, ignore and pass
            if (token.dep_ == "advmod"):
                continue
            elif (token.dep_ == "amod"):
                sent_dict[token.head.text] += sentiment
                word_dict[token.head.text] = token.text
            # for opinion words that are adjectives, adverbs, verbs...
            else:
                for child in token.children:
                    print(child)
                    # if there's a adj modifier (i.e. very, pretty, etc.) add more weight to sentiment
                    # This could be better updated for modifiers that either positively or negatively emphasize
                    if ((child.dep_ == "amod") or (child.dep_ == "advmod")) and (child.text in opinion_words):
                        sentiment *= 1.5
                    # check for negation words and flip the sign of sentiment
                    if child.dep_ == "neg":
                        sentiment *= -1
                        
                for child in token.children:
                    # if verb, check if there's a direct object
                    if (token.pos_ == "VERB") & (child.dep_ == "dobj"):                        
                        sent_dict[child.text] += sentiment
                        word_dict[child.text] = token.text
                        # check for conjugates (a AND b), then add both to dictionary
                        subchildren = []
                        conj = 0
                        for subchild in child.children:
                            if subchild.text == "and":
                                conj=1
                            if (conj == 1) and (subchild.text != "and"):
                                subchildren.append(subchild.text)
                                conj = 0
                        for subchild in subchildren:
                            sent_dict[subchild] += sentiment
                            word_dict[subchild] = token.text

                # check for negation
                for child in token.head.children:
                    noun = ""
                    if ((child.dep_ == "amod") or (child.dep_ == "advmod")) and (child.text in opinion_words):
                        sentiment *= 1.5
                    # check for negation words and flip the sign of sentiment
                    if (child.dep_ == "neg"): 
                        sentiment *= -1
                
                # check for nouns
                for child in token.head.children:
                    noun = ""
                    if (child.pos_ == "NOUN") and (child.text not in sent_dict):
                        noun = child.text
                        # Check for compound nouns
                        for subchild in child.children:
                            if subchild.dep_ == "compound":
                                noun = subchild.text + " " + noun
                        sent_dict[noun] += sentiment
                        word_dict[noun] = token.text
                    debug += 1
    return sent_dict, word_dict


In [49]:
def feature_sentiment(sentence):
    '''
    input: dictionary and sentence
    function: appends dictionary with new features if the feature did not exist previously,
              then updates sentiment to each of the new or existing features
    output: updated dictionary
    '''

    sent_dict = Counter()
    word_dict = {}
    sentence = nlp(sentence)
    debug = 0
    for token in sentence:
        # check if the word is an opinion word, then assign sentiment
        if token.text in opinion_words:
            opinion = [token.text]
            sentiment = 1 if token.text in pos else -1
            
            # if target is an adverb modifier (i.e. pretty, highly, etc.)
            # but happens to be an opinion word, ignore and pass
            if (token.dep_ == "advmod"):
                continue
            elif (token.dep_ == "amod"):
                sent_dict[token.head.text] += sentiment
                word_dict[token.head.text] = token.text
            # for opinion words that are adjectives, adverbs, verbs...
            else:
                for child in token.children:
                    print(child)
                    # if there's a adj modifier (i.e. very, pretty, etc.) add more weight to sentiment
                    # This could be better updated for modifiers that either positively or negatively emphasize
                    if ((child.dep_ == "amod") or (child.dep_ == "advmod")) and (child.text in opinion_words):
                        sentiment *= 1.5
                    # check for negation words and flip the sign of sentiment
                    if child.dep_ == "neg":
                        sentiment *= -1
                        
                for child in token.children:
                    # if verb, check if there's a direct object
                    if (token.pos_ == "VERB") & (child.dep_ == "dobj"):                        
                        sent_dict[child.text] += sentiment
                        word_dict[child.text] = token.text
                        # check for conjugates (a AND b), then add both to dictionary
                        subchildren = []
                        conj = 0
                        for subchild in child.children:
                            if subchild.text == "and":
                                conj=1
                            if (conj == 1) and (subchild.text != "and"):
                                subchildren.append(subchild.text)
                                conj = 0
                        for subchild in subchildren:
                            sent_dict[subchild] += sentiment
                            word_dict[subchild] = token.text

                # check for negation
                for child in token.head.children:
                    noun = ""
                    if ((child.dep_ == "amod") or (child.dep_ == "advmod")) and (child.text in opinion_words):
                        sentiment *= 1.5
                    # check for negation words and flip the sign of sentiment
                    if (child.dep_ == "neg"): 
                        sentiment *= -1
                
                # check for nouns
                for child in token.head.children:
                    noun = ""
                    if (child.pos_ == "NOUN") and (child.text not in sent_dict):
                        noun = child.text
                        # Check for compound nouns
                        for subchild in child.children:
                            if subchild.dep_ == "compound":
                                noun = subchild.text + " " + noun
                        sent_dict[noun] += sentiment
                        word_dict[noun] = token.text
                    debug += 1
    return sent_dict, word_dict



In [48]:
# test code for feature sentiment
w= "The sushi here is good. bread was not bad, but the music was freaking terrible."
feature_sentiment(w)

music
was
terrible
.


(Counter({'sushi': 1, 'bread': 1, 'music': -1}),
 {'sushi': 'good', 'bread': 'bad', 'music': 'terrible'})

In [38]:
word_dict = {}
word_dict['1wqqwe'] = 'asdasd'

In [39]:
word_dict

{'1wqqwe': 'asdasd'}