## Friends Sentiment Analysis

In [1]:
import pandas as pd
friends_quotes = pd.read_csv("friends_data.csv")

In [2]:
import requests
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
nltk.download('stopwords')

#TextBlob
from textblob import TextBlob

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/carlotaportillo/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
quotes = requests.get("http://127.0.0.1:5000/lines")

In [4]:
to_json = quotes.json()

In [5]:
df_friends = pd.DataFrame(to_json)

In [6]:
del df_friends['_id']

In [7]:
pd.set_option('display.max_colwidth', None)
df_friends

Unnamed: 0,line,name,episode
0,"No Mom, I don't have a restaurant, I work in a restaurant.",Monica,Episode 2
1,(entering) Hi guys!,Phoebe,3.0
2,"Hey, Pheebs! Hi!",All,3.0
3,"Hey. Oh, oh, how'd it go?",Ross,3.0
4,"Um, not so good. He walked me to the subway and said 'We should do this again!'",Phoebe,3.0
...,...,...,...
727,"You are, you're welling up.",Ross,2.0
728,Am not!,Monica,2.0
729,You're gonna be an aunt.,Ross,2.0
730,(pushes him and starts to cry) Oh shut up!,Monica,2.0


### 1.1 Tokenization

In [8]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/carlotaportillo/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [9]:
tokenizer = RegexpTokenizer(r'\w+')
tokens = tokenizer.tokenize(df_friends.iloc[0]['line'])
tokens

['No',
 'Mom',
 'I',
 'don',
 't',
 'have',
 'a',
 'restaurant',
 'I',
 'work',
 'in',
 'a',
 'restaurant']

In [10]:
def tokenize (string):
    tokenizer = RegexpTokenizer(r'\w+')
    tokens = tokenizer.tokenize(string)
    return tokens

In [11]:
df_friends['line'] = df_friends['line'].apply(tokenize)
df_friends.head() 

Unnamed: 0,line,name,episode
0,"[No, Mom, I, don, t, have, a, restaurant, I, work, in, a, restaurant]",Monica,Episode 2
1,"[entering, Hi, guys]",Phoebe,3.0
2,"[Hey, Pheebs, Hi]",All,3.0
3,"[Hey, Oh, oh, how, d, it, go]",Ross,3.0
4,"[Um, not, so, good, He, walked, me, to, the, subway, and, said, We, should, do, this, again]",Phoebe,3.0


In [16]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/carlotaportillo/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [17]:
stop = stopwords.words('english')

In [15]:
df_friends['line'] = df_friends['line'].apply(lambda x: " ".join(x))
df_friends.head()

Unnamed: 0,line,name,episode
0,No Mom I don t have a restaurant I work in a restaurant,Monica,Episode 2
1,entering Hi guys,Phoebe,3.0
2,Hey Pheebs Hi,All,3.0
3,Hey Oh oh how d it go,Ross,3.0
4,Um not so good He walked me to the subway and said We should do this again,Phoebe,3.0


In [18]:
df_friends.line =df_friends.line.apply(lambda x: x.split(" "))

In [19]:
df_friends.head()

Unnamed: 0,line,name,episode
0,"[No, Mom, I, don, t, have, a, restaurant, I, work, in, a, restaurant]",Monica,Episode 2
1,"[entering, Hi, guys]",Phoebe,3.0
2,"[Hey, Pheebs, Hi]",All,3.0
3,"[Hey, Oh, oh, how, d, it, go]",Ross,3.0
4,"[Um, not, so, good, He, walked, me, to, the, subway, and, said, We, should, do, this, again]",Phoebe,3.0


### 1.2 Stopping useless words:

In [20]:
def stop_words (lista):
    stop_words = set(stopwords.words('english'))
    nueva_lista = []
    for string in lista:
        if string not in stop_words:
            nueva_lista.append(string)
    return " ".join(nueva_lista)

In [21]:
df_friends.line = df_friends.line.apply(stop_words)
df_friends.head()

Unnamed: 0,line,name,episode
0,No Mom I restaurant I work restaurant,Monica,Episode 2
1,entering Hi guys,Phoebe,3.0
2,Hey Pheebs Hi,All,3.0
3,Hey Oh oh go,Ross,3.0
4,Um good He walked subway said We,Phoebe,3.0


### 2.1 Sentiment analysis:

In [25]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/carlotaportillo/nltk_data...


True

In [26]:
def sentimentAnalysis(sentence):
    sia = SentimentIntensityAnalyzer()
    polarity = sia.polarity_scores(sentence)
    pol = polarity['compound']
    return pol

In [29]:
df_friends['quotes_sentiment_compound'] = df_friends.line.apply(sentimentAnalysis)
df_friends.head()

Unnamed: 0,line,name,episode,quotes_sentiment_compound
0,No Mom I restaurant I work restaurant,Monica,Episode 2,-0.296
1,entering Hi guys,Phoebe,3.0,0.0
2,Hey Pheebs Hi,All,3.0,0.0
3,Hey Oh oh go,Ross,3.0,0.0
4,Um good He walked subway said We,Phoebe,3.0,0.4404


In [39]:
df_friends.groupby("name")["quotes_sentiment_compound"].mean(). sort_values()

name
Carol                      -0.047233
Paula                      -0.021900
Bernice                     0.000000
Paolo                       0.040569
Robbie                      0.045260
Susan                       0.048011
Monica                      0.089044
Mr. Heckles                 0.108080
Lizzie                      0.108445
Phoebe                      0.117571
Mrs. Geller                 0.119530
Joey                        0.145465
Ross                        0.167445
Marsha                      0.168775
All                         0.172123
Rachel                      0.172487
Barry                       0.178237
Chandler                    0.196692
The Guys                    0.202300
Jill                        0.257069
Chandler and Joey           0.296000
Mr. Geller                  0.309440
Alan                        0.328250
Dr. Oberman                 0.389150
Monica, Joey, and Phoebe    0.440400
Name: quotes_sentiment_compound, dtype: float64