In [1]:
import re
import nltk
import pandas as pd
import numpy as np
from google.cloud import language

In [10]:
language_client = language.Client()

In [109]:
def googleAnnotate(fileName, nrows=None):
    '''
    Takes a csv file in the form of [speaker, dialogue]
    Pass each dialogue into google language API
    Attach the output to the dataframe: [speaker, dialogue, sentences, sentiment, entities, tokens]
    sentences contain a dict {content, begin(index), (sentiment)score, (sentiment)magnitude} for each sentence
    sentiment contain {score, magnitude} for the entire dialogue
    entities contain {name, type, metadata, salience, mentions} for each entity
    tokens contain {}
    '''
    language_client = language.Client()
    df = pd.read_csv(fileName, names=['speaker', 'dialogue'], nrows=nrows)
    df['sentences'] = None
    df['sentiment'] = None
    df['entities'] = None
    df['tokens'] = None
    for i in range(len(df)):
        document = language_client.document_from_text(df.loc[i, 'dialogue'])
        annText = document.annotate_text()
        df.set_value(i, 'sentences', [{'content':x.content, 'begin':x.begin, 'score':x.sentiment.score, 
                                       'magnitude':x.sentiment.magnitude} for x in annText.sentences])
        df.set_value(i, 'sentiment', {'score':annText.sentiment.score, 'magnitude':annText.sentiment.magnitude})
        df.set_value(i, 'entities', [{'name':x.name, 'type':x.entity_type, 'meta':x.metadata, 
                                      'salience':x.salience, 'mentions':x.mentions} for x in annText.entities])
        df.set_value(i, 'tokens', [{'content':x.text_content, 'begin':x.text_begin, 'pos':x.part_of_speech, 
                                    'index':x.edge_index, 'label':x.edge_label, 'lemma':x.lemma} for x in annText.tokens])
    return df

In [110]:
df = googleAnnotate('prep_scripts/thor_tw.csv', 5)

In [111]:
df.head()

Unnamed: 0,speaker,dialogue,sentences,sentiment,entities,tokens
0,narrator,first lines,"[{u'content': u'first lines', u'begin': 0, u's...","{u'score': 0, u'magnitude': 0}","[{u'salience': 1, u'meta': {}, u'type': u'OTHE...","[{u'index': 1, u'begin': 0, u'pos': u'ADJ', u'..."
1,jane foster,wait for it.,"[{u'content': u'wait for it.', u'begin': 0, u'...","{u'score': 0, u'magnitude': 0}",[],"[{u'index': 0, u'begin': 0, u'pos': u'VERB', u..."
2,darcy lewis,can i turn on the radio?,"[{u'content': u'can i turn on the radio?', u'b...","{u'score': 0, u'magnitude': 0}","[{u'salience': 1, u'meta': {}, u'type': u'OTHE...","[{u'index': 2, u'begin': 0, u'pos': u'VERB', u..."
3,jane foster,no!,"[{u'content': u'no!', u'begin': 0, u'score': -...","{u'score': -0.3, u'magnitude': 0.3}",[],"[{u'index': 0, u'begin': 0, u'pos': u'X', u'la..."
4,erik selvig,"jane, you cant keep doing this.","[{u'content': u'jane, you cant keep doing this...","{u'score': -0.4, u'magnitude': 0.4}",[],"[{u'index': 4, u'begin': 0, u'pos': u'NOUN', u..."
