In [1]:
from pymongo import MongoClient
import pymongo
import json
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import seaborn as sns

%matplotlib inline

## 1. Populate Dataframes

In [2]:
# read from csv files
 
topicsDF = pd.read_csv(r'topics.csv',encoding='latin-1')

twitterDF = pd.read_csv(r'twitter.csv',encoding='latin-1')

In [3]:
# Total number for each company
wf_twitterDF= twitterDF[(twitterDF.source == 'WF') & (twitterDF.isRelated=='Y')]
print("WF Twits: "+ str(wf_twitterDF.size))

sap_twitterDF = twitterDF[(twitterDF.source == 'SAP') & (twitterDF.isRelated=='Y')]
print("SAP Twits: "+ str(sap_twitterDF.size))

ww_twitterDF = twitterDF[(twitterDF.source == 'WW') & (twitterDF.isRelated=='Y')]
print("WeWork Twits: "+ str(ww_twitterDF.size))

WF Twits: 8760
SAP Twits: 10480
WeWork Twits: 980


In [4]:
wf_twitterDF.head()

Unnamed: 0.1,Unnamed: 0,id,text,URL,createdTimestamp,quoteCount,replyCount,retweetCount,favoriteCount,isInReplyTouser_id,userLocation,userFollowersCount,userFriendsCount,lang,isRelated,source,sourceDate,NP,isInReplyTo,user_id
0,0,5dab73ca11bb5a1f6c86dfbd,Wells Fargo names outsider as new CEO three ye...,https://t.co/WBtIAtX8Gf,2019-09-27 23:51:00,2,0,0,0,,,4018,3918,en,Y,WF,2019-09-27,,0.0,4867858000.0
1,1,5dab73cb11bb5a1f6c86dfbe,Wells Fargo names outsider as new CEO three ye...,https://t.co/veyphufgzw,2019-09-27 23:47:53,0,0,0,0,,,4,1,en,Y,WF,2019-09-27,,0.0,1.088562e+18
2,2,5dab73cb11bb5a1f6c86dfbf,Charles Scharf as CEO Can Revive Wells Fargo. ...,https://t.co/7q3CanBgNk,2019-09-27 23:39:53,0,0,0,0,,"New York, New York",2045,1023,en,Y,WF,2019-09-27,,0.0,1.164775e+18
3,3,5dab73cb11bb5a1f6c86dfc0,Wells Fargo Finally Appoints New CEO Who Is A ...,https://t.co/cghUhYeY8U,2019-09-27 23:39:38,0,0,0,0,,,237,367,en,Y,WF,2019-09-27,,0.0,8.356552e+17
4,4,5dab73cb11bb5a1f6c86dfc1,Wells Fargo gives new CEO a 40% pay boost to $...,https://t.co/1Dl0uMQxzV,2019-09-27 23:37:41,0,0,1,0,,"Orange County, CA",212542,2481,en,Y,WF,2019-09-27,,0.0,17820490.0


## 2. Run through Vader Sentiment - WF

In [5]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [6]:
def get_sentiment_score (row):
    compound=np.nan
    if row['text'] is not None:
        analyzer = SentimentIntensityAnalyzer()
        vs = analyzer.polarity_scores(row['text'])
        compound=vs['compound']
    return compound

def set_sentiment (row):
    #1. positive sentiment: compound score >= 0.50
    #2. neutral sentiment: (compound score > -0.50) and (compound score < 0.50)
    #3. negative sentiment: compound score <= -0.50
    sentiment= 'E' # Neutral
    upper_limit = 0.50
    lower_limit = -0.50
    if (row['vader_compound'] >= upper_limit):
        sentiment = 'P' # Positive
    elif (row['vader_compound'] <= lower_limit):
        sentiment = 'N' # Negative
        
    return sentiment
        

In [7]:
# https://stackoverflow.com/questions/26886653/pandas-create-new-column-based-on-values-from-other-columns-apply-a-function-o

wf_twitterDF['vader_compound'] = wf_twitterDF.apply (lambda row: get_sentiment_score(row), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [8]:
wf_twitterDF['vader_sentiment'] = wf_twitterDF.apply (lambda row: set_sentiment(row), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [9]:
wf_twitterDF.head()

Unnamed: 0.1,Unnamed: 0,id,text,URL,createdTimestamp,quoteCount,replyCount,retweetCount,favoriteCount,isInReplyTouser_id,...,userFriendsCount,lang,isRelated,source,sourceDate,NP,isInReplyTo,user_id,vader_compound,vader_sentiment
0,0,5dab73ca11bb5a1f6c86dfbd,Wells Fargo names outsider as new CEO three ye...,https://t.co/WBtIAtX8Gf,2019-09-27 23:51:00,2,0,0,0,,...,3918,en,Y,WF,2019-09-27,,0.0,4867858000.0,-0.6124,N
1,1,5dab73cb11bb5a1f6c86dfbe,Wells Fargo names outsider as new CEO three ye...,https://t.co/veyphufgzw,2019-09-27 23:47:53,0,0,0,0,,...,1,en,Y,WF,2019-09-27,,0.0,1.088562e+18,0.25,E
2,2,5dab73cb11bb5a1f6c86dfbf,Charles Scharf as CEO Can Revive Wells Fargo. ...,https://t.co/7q3CanBgNk,2019-09-27 23:39:53,0,0,0,0,,...,1023,en,Y,WF,2019-09-27,,0.0,1.164775e+18,0.5267,P
3,3,5dab73cb11bb5a1f6c86dfc0,Wells Fargo Finally Appoints New CEO Who Is A ...,https://t.co/cghUhYeY8U,2019-09-27 23:39:38,0,0,0,0,,...,367,en,Y,WF,2019-09-27,,0.0,8.356552e+17,0.25,E
4,4,5dab73cb11bb5a1f6c86dfc1,Wells Fargo gives new CEO a 40% pay boost to $...,https://t.co/1Dl0uMQxzV,2019-09-27 23:37:41,0,0,1,0,,...,2481,en,Y,WF,2019-09-27,,0.0,17820490.0,0.5106,P


In [10]:
# Save to csv for future analysis
wf_twitterDF.to_csv('twitter_wf_vader.csv')

## 3. Run through Vader Sentiment - SAP

In [11]:
sap_twitterDF['vader_compound'] = sap_twitterDF.apply (lambda row: get_sentiment_score(row), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [12]:
sap_twitterDF['vader_sentiment'] = sap_twitterDF.apply (lambda row: set_sentiment(row), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [13]:
sap_twitterDF.head()

Unnamed: 0.1,Unnamed: 0,id,text,URL,createdTimestamp,quoteCount,replyCount,retweetCount,favoriteCount,isInReplyTouser_id,...,userFriendsCount,lang,isRelated,source,sourceDate,NP,isInReplyTo,user_id,vader_compound,vader_sentiment
619,619,5dae77e811bb5a25b069085e,"Experience Management: ""Start with empathy for...",,2019-10-09 18:10:03,0,0,0,0,,...,3925,en,Y,SAP,2019-10-09,,0.0,259341900.0,0.0,E
621,621,5dae77e811bb5a25b0690860,"Experience Management: ""Start with empathy for...",https://t.co/t0MxMZ5DxI,2019-10-09 18:00:01,0,0,2,5,,...,3914,en,Y,SAP,2019-10-09,,0.0,326687300.0,0.0,E
633,633,5dae77e911bb5a25b069086c,SAP CEO has made clear: the new economy is he...,https://t.co/tqPgEZix2U,2019-10-09 14:36:00,0,0,0,0,,...,2169,en,Y,SAP,2019-10-09,,0.0,110512900.0,0.1027,E
643,643,5dae77e911bb5a25b0690876,Thanks for hosting Bill Mcdermott CEO of SAP ...,,2019-10-09 05:21:42,0,0,0,1,,...,319,en,Y,SAP,2019-10-09,,0.0,1.02489e+18,0.7184,P
645,645,5dae784011bb5a25b0690878,"New article: ""SAP CEO Bill McDermott stepping ...",https://t.co/DoQvIY7sVy,2019-10-10 23:58:48,0,0,0,1,,...,279,en,Y,SAP,2019-10-10,,0.0,7.297547e+17,0.0,E


In [14]:
# Save to csv for future analysis
sap_twitterDF.to_csv('twitter_sap_vader.csv')