<a href="https://colab.research.google.com/github/ujjwalbb30/DATA_602_Twitter_Sentiment_Analysis_Stock_Market_Prediction/blob/main/python_notebooks/Step_3_sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Performing Sentiment Analysis on prepared tweets dataframes**

### Now that we have prepared 4 different aggregated tweets dataframes for each company (described in notebook 'Step_2_prepare_tweets_dataframes') we will now perform sentiment analysis on all these dataframes in following manner:    

### (1) We will clean and preprocess all the data
### (2) We will use the cleaned and preprocessed data to get polarity, subjectivity, sentiment, negative, positive and neutral values (for each row in each dataframe and add them as a separate column).
### (3) Then, we will export these dataframes for further use.

In [None]:
# installing the required libraries
!pip install tweet-preprocessor

Collecting tweet-preprocessor
  Downloading tweet_preprocessor-0.6.0-py3-none-any.whl (27 kB)
Installing collected packages: tweet-preprocessor
Successfully installed tweet-preprocessor-0.6.0


In [None]:
# importing required dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from textblob import TextBlob as tb
import sys
import os
import nltk
import re
import string
from wordcloud import WordCloud, STOPWORDS
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.stem import SnowballStemmer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import CountVectorizer
import preprocessor as p



In [None]:
# mounting the drive to access datasets
from google.colab import drive 
drive.mount('/content/gdrive')

Mounted at /content/gdrive


# **For Apple**

## **Dataset Cleaning**

### **Dataset 1 : Same Day Tweets**

In [None]:
# importing the dataset
comb_tweets_apple1 = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Apple/comb_tweets_apple1.csv')
comb_tweets_apple1 = comb_tweets_apple1.drop(columns=['Unnamed: 0'])
display(comb_tweets_apple1)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554
...,...,...,...,...,...
1822,2019-12-28,Some top money flow for Dec 27Start your free ...,127,545,1579
1823,2019-12-29,Notable activity on social media in the past w...,74,307,528
1824,2019-12-30,Siguenos en telegram https://t.me/tradingbalea...,271,725,1821
1825,2019-12-31,Some top money flow for Dec 30Start your free ...,244,404,2088


In [None]:
# extracting the tweet body column
comb_tweets_apple1['clean_text'] = comb_tweets_apple1['body']
display(comb_tweets_apple1['clean_text'])

0       lx21 made $10,008  on $AAPL -Check it out! htt...
1       $aapl could be a problem sitting on support th...
2       Zacks' Bull Of The Day: Apple http://seekingal...
3       Free 5€ in account balance for first 100.000 m...
4       Free 5€ in account balance for first 100.000 m...
                              ...                        
1822    Some top money flow for Dec 27Start your free ...
1823    Notable activity on social media in the past w...
1824    Siguenos en telegram https://t.me/tradingbalea...
1825    Some top money flow for Dec 30Start your free ...
1826    Popular: $AAPL, $IWM, $BTAI, $FTNT, $ALIM, $BP...
Name: clean_text, Length: 1827, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_apple1['clean_text'] = comb_tweets_apple1['clean_text'].str.lower()

# defining a function to remove hashtags, mentions, punctuations, non-alphanumeric characters and ticker symbols
def clean_tweets(text):
    # removing the hashtags and mentions
    text = re.sub("@[A-Za-z0-9_]+"," ",text)
    text = re.sub("#[A-Za-z0-9_]+"," ",text)
    # removing links
    text = re.sub(r"http\S+"," ",text)
    text = re.sub(r"www.\S+"," ",text)
    # removing punctuations
    text =re.sub('[()!?]'," ",text)
    text = re.sub('\[.*?\]'," ",text)
    # filtering non-alphanumeric characters
    text = re.sub("[^a-z0-9]"," ",text)
    # removing ticker symbols from the tweets
    text = re.sub('amzn','',text)
    text = re.sub('aapl','',text)
    text = re.sub('msft','',text)
    text = re.sub('tsla','',text)
    text = re.sub('goog','',text)
    text  = "".join([char for char in text if char not in string.punctuation])
    text = p.clean(text)
    return text

In [None]:
# viewing the cleaned tweets
comb_tweets_apple1['clean_text'] = comb_tweets_apple1['clean_text'].apply(lambda x: clean_tweets(x))
display(comb_tweets_apple1['clean_text'].head())

0    lx21 made on check it out learn exe watt imrs ...
1    could be a problem sitting on support that bre...
2    zacks bull of the day apple have a great weeke...
3    free in account balance for first members lnkd...
4    free in account balance for first members lnkd...
Name: clean_text, dtype: object

### **Dataset - 2 : Last 3 Days Tweets**

In [None]:
# importing the dataset
comb_tweets_apple3d = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Apple/comb_tweets_apple3d.csv')
comb_tweets_apple3d = comb_tweets_apple3d.drop(columns=['Unnamed: 0'])
print(comb_tweets_apple3d)

     updated_dates  ... like_num
0       2015-01-03  ...      981
1       2015-01-04  ...     1077
2       2015-01-05  ...     1069
3       2015-01-06  ...     1319
4       2015-01-07  ...     1572
...            ...  ...      ...
1820    2019-12-28  ...     4261
1821    2019-12-29  ...     3332
1822    2019-12-30  ...     3928
1823    2019-12-31  ...     4437
1824    2020-01-01  ...     4041

[1825 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_apple3d['clean_text'] = comb_tweets_apple3d['body']
display(comb_tweets_apple3d)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,Zacks' Bull Of The Day: Apple http://seekingal...
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,Free 5€ in account balance for first 100.000 m...
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,Free 5€ in account balance for first 100.000 m...
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,Apple: Does The Party End In 2015? http://seek...
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,The Ever-Changing World Of Apple http://seekin...
...,...,...,...,...,...,...
1820,2019-12-28,Some top money flow for Dec 27Start your free ...,532,1081,4261,Some top money flow for Dec 27Start your free ...
1821,2019-12-29,Notable activity on social media in the past w...,397,1035,3332,Notable activity on social media in the past w...
1822,2019-12-30,Siguenos en telegram https://t.me/tradingbalea...,472,1577,3928,Siguenos en telegram https://t.me/tradingbalea...
1823,2019-12-31,Some top money flow for Dec 30Start your free ...,589,1436,4437,Some top money flow for Dec 30Start your free ...


In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_apple3d['clean_text'] = comb_tweets_apple3d['clean_text'].str.lower()

In [None]:
comb_tweets_apple3d['clean_text'] = comb_tweets_apple3d['clean_text'].apply(lambda x: clean_tweets(x))
display(comb_tweets_apple3d['clean_text'].head())

0    zacks bull of the day apple have a great weeke...
1    free in account balance for first members lnkd...
2    free in account balance for first members lnkd...
3    apple does the party end in long with successf...
4    the ever changing world of apple what dean kar...
Name: clean_text, dtype: object

### **Dataset - 3 : Last 7 Days Tweets**

In [None]:
# importing the dataset
comb_tweets_apple7d = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Apple/comb_tweets_apple7d.csv')
comb_tweets_apple7d = comb_tweets_apple7d.drop(columns=['Unnamed: 0'])
print(comb_tweets_apple7d)

     updated_dates  ... like_num
0       2015-01-07  ...     2876
1       2015-01-08  ...     3472
2       2015-01-09  ...     3522
3       2015-01-10  ...     3616
4       2015-01-11  ...     3519
...            ...  ...      ...
1816    2019-12-28  ...     8743
1817    2019-12-29  ...     8104
1818    2019-12-30  ...     8661
1819    2019-12-31  ...     9614
1820    2020-01-01  ...     8830

[1821 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_apple7d['clean_text'] = comb_tweets_apple7d['body']
display(comb_tweets_apple7d['clean_text'])

0       The Ever-Changing World Of Apple http://seekin...
1       One Thing Apple Should Do To Preserve Its Mark...
2       How iPhone Demand Could Impact Apple's Q2 Guid...
3       Apple: Asian Carriers Are Making iPhone 6 Chea...
4       Caturday: There is no MacBook, there is only Z...
                              ...                        
1816    Some top money flow for Dec 27Start your free ...
1817    Notable activity on social media in the past w...
1818    Siguenos en telegram https://t.me/tradingbalea...
1819    Some top money flow for Dec 30Start your free ...
1820    Popular: $AAPL, $IWM, $BTAI, $FTNT, $ALIM, $BP...
Name: clean_text, Length: 1821, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_apple7d['clean_text'] = comb_tweets_apple7d['clean_text'].str.lower()


In [None]:
comb_tweets_apple7d['clean_text'] = comb_tweets_apple7d['clean_text'].apply(lambda x: clean_tweets(x))
print(comb_tweets_apple7d['clean_text'].head())

0    the ever changing world of apple what dean kar...
1    one thing apple should do to preserve its mark...
2    how iphone demand could impact apple s q2 guid...
3    apple asian carriers are making iphone cheaper...
4    caturday there is no macbook there is only zuu...
Name: clean_text, dtype: object


### **Dataset - 4 : Previous Days Tweets**

In [None]:
# importing the dataset
comb_tweets_appleprev = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Apple/comb_tweets_appleprev.csv')
comb_tweets_appleprev = comb_tweets_appleprev.drop(columns=['Unnamed: 0'])
print(comb_tweets_appleprev)

     updated_dates  ... like_num
0       2015-01-01  ...       50
1       2015-01-02  ...      408
2       2015-01-03  ...      439
3       2015-01-04  ...      133
4       2015-01-05  ...      411
...            ...  ...      ...
1821    2019-12-28  ...     1058
1822    2019-12-29  ...     1460
1823    2019-12-30  ...      577
1824    2019-12-31  ...     2684
1825    2020-01-01  ...     1081

[1826 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_appleprev['clean_text'] = comb_tweets_appleprev['body']
display(comb_tweets_appleprev['clean_text'].head())

0    lx21 made $10,008  on $AAPL -Check it out! htt...
1    @davenathan I just expected something more ext...
2    The Closing Bell is out! http://paper.li/BonaC...
3    The Closing Bell is out! http://paper.li/BonaC...
4    #Apple : Price Target Raised to $125.00 at Arg...
Name: clean_text, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase and saving it in a new column
comb_tweets_appleprev['clean_text'] = comb_tweets_appleprev['clean_text'].str.lower()


In [None]:
comb_tweets_appleprev['clean_text'] = comb_tweets_appleprev['clean_text'].apply(lambda x: clean_tweets(x))
print(comb_tweets_appleprev['clean_text'].head())

0    lx21 made on check it out learn exe watt imrs ...
1    i just expected something more extravagant lik...
2    the closing bell is out eurusd gbpusd usdjpy a...
3    the closing bell is out eurusd gbpusd usdjpy a...
4    price target raised to at argus free in accoun...
Name: clean_text, dtype: object


## **Finding Sentiment Analysis Values**

### **Dataset - 1 : Same Day Tweets**

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_apple1['polarity'] = comb_tweets_apple1['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_apple1['subjectivity'] = comb_tweets_apple1['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_apple1.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227,lx21 made on check it out learn exe watt imrs ...,0.152047,0.469165
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562,could be a problem sitting on support that bre...,0.137389,0.445212
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192,zacks bull of the day apple have a great weeke...,0.186061,0.49581
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323,free in account balance for first members lnkd...,0.214288,0.494194
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554,free in account balance for first members lnkd...,0.171292,0.464973
5,2015-01-06,Apple: Does The Party End In 2015? http://seek...,115,228,442,apple does the party end in long with successf...,0.15294,0.461804
6,2015-01-07,The Ever-Changing World Of Apple http://seekin...,105,209,576,the ever changing world of apple what dean kar...,0.170265,0.452655
7,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,251,428,823,one thing apple should do to preserve its mark...,0.189625,0.4805
8,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,177,364,612,how iphone demand could impact apple s q2 guid...,0.186721,0.474236
9,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,50,208,286,apple asian carriers are making iphone cheaper...,0.260925,0.425825


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment1apple = []

for index, row in comb_tweets_apple1.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment1apple.append(s)

print(sentiment1apple)

[{'neg': 0.049, 'neu': 0.795, 'pos': 0.156, 'compound': 0.9999}, {'neg': 0.055, 'neu': 0.825, 'pos': 0.12, 'compound': 0.9999}, {'neg': 0.047, 'neu': 0.772, 'pos': 0.181, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.762, 'pos': 0.193, 'compound': 1.0}, {'neg': 0.06, 'neu': 0.773, 'pos': 0.167, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.752, 'pos': 0.182, 'compound': 1.0}, {'neg': 0.055, 'neu': 0.781, 'pos': 0.164, 'compound': 1.0}, {'neg': 0.048, 'neu': 0.787, 'pos': 0.164, 'compound': 1.0}, {'neg': 0.054, 'neu': 0.786, 'pos': 0.159, 'compound': 1.0}, {'neg': 0.029, 'neu': 0.795, 'pos': 0.176, 'compound': 1.0}, {'neg': 0.036, 'neu': 0.752, 'pos': 0.212, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.795, 'pos': 0.155, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.76, 'pos': 0.188, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.78, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.075, 'neu': 0.785, 'pos': 0.14, 'compound': 1.0}, {'neg': 0.074, 'neu': 0.788, 'pos': 0.137, 'compound': 1.0}, {'neg': 0.058, 'neu': 

In [None]:
# converting the sentiment values to a dataframe
sentiment1apple_df = pd.DataFrame(sentiment1apple)
print(sentiment1apple_df)

        neg    neu    pos  compound
0     0.049  0.795  0.156    0.9999
1     0.055  0.825  0.120    0.9999
2     0.047  0.772  0.181    1.0000
3     0.046  0.762  0.193    1.0000
4     0.060  0.773  0.167    1.0000
...     ...    ...    ...       ...
1822  0.044  0.820  0.136    0.9999
1823  0.047  0.811  0.142    0.9999
1824  0.051  0.776  0.173    1.0000
1825  0.052  0.808  0.140    1.0000
1826  0.074  0.784  0.143    0.9988

[1827 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_apple1['neg'],comb_tweets_apple1['neu'],comb_tweets_apple1['pos'],comb_tweets_apple1['compound'] = sentiment1apple_df['neg'], sentiment1apple_df['neu'], sentiment1apple_df['pos'], sentiment1apple_df['compound'] 

comb_tweets_apple1.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227,lx21 made on check it out learn exe watt imrs ...,0.152047,0.469165,0.049,0.795,0.156,0.9999
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562,could be a problem sitting on support that bre...,0.137389,0.445212,0.055,0.825,0.12,0.9999
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192,zacks bull of the day apple have a great weeke...,0.186061,0.49581,0.047,0.772,0.181,1.0
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323,free in account balance for first members lnkd...,0.214288,0.494194,0.046,0.762,0.193,1.0
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554,free in account balance for first members lnkd...,0.171292,0.464973,0.06,0.773,0.167,1.0
5,2015-01-06,Apple: Does The Party End In 2015? http://seek...,115,228,442,apple does the party end in long with successf...,0.15294,0.461804,0.066,0.752,0.182,1.0
6,2015-01-07,The Ever-Changing World Of Apple http://seekin...,105,209,576,the ever changing world of apple what dean kar...,0.170265,0.452655,0.055,0.781,0.164,1.0
7,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,251,428,823,one thing apple should do to preserve its mark...,0.189625,0.4805,0.048,0.787,0.164,1.0
8,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,177,364,612,how iphone demand could impact apple s q2 guid...,0.186721,0.474236,0.054,0.786,0.159,1.0
9,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,50,208,286,apple asian carriers are making iphone cheaper...,0.260925,0.425825,0.029,0.795,0.176,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt1apple = []

for index, row in comb_tweets_apple1.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt1apple.append(snt)

In [None]:
print(sntmnt1apple)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_apple1['sentiment'] = sntmnt1apple
print(comb_tweets_apple1.head())

  updated_dates                                               body  \
0    2015-01-01  lx21 made $10,008  on $AAPL -Check it out! htt...   
1    2015-01-02  $aapl could be a problem sitting on support th...   
2    2015-01-03  Zacks' Bull Of The Day: Apple http://seekingal...   
3    2015-01-04  Free 5€ in account balance for first 100.000 m...   
4    2015-01-05  Free 5€ in account balance for first 100.000 m...   

   comment_num  retweet_num  like_num  \
0           17          968       227   
1          128         2102       562   
2           31          103       192   
3           52          328       323   
4          139          392       554   

                                          clean_text  polarity  subjectivity  \
0  lx21 made on check it out learn exe watt imrs ...  0.152047      0.469165   
1  could be a problem sitting on support that bre...  0.137389      0.445212   
2  zacks bull of the day apple have a great weeke...  0.186061      0.495810   
3  free in a

In [None]:
# exporting the first dataframe
comb_tweets_apple1.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Apple/comb_tweets_apple1_snt.csv')

### **Dataset - 2 : Last 3 Days Tweets**

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_apple3d['polarity'] = comb_tweets_apple3d['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_apple3d['subjectivity'] = comb_tweets_apple3d['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_apple3d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,zacks bull of the day apple have a great weeke...,0.158465,0.468734
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,free in account balance for first members lnkd...,0.175851,0.476125
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,free in account balance for first members lnkd...,0.183886,0.47845
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,apple does the party end in long with successf...,0.169583,0.468
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,the ever changing world of apple what dean kar...,0.164246,0.459994
5,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,471,865,1841,one thing apple should do to preserve its mark...,0.172146,0.466585
6,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,533,1001,2011,how iphone demand could impact apple s q2 guid...,0.183188,0.47059
7,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,478,1000,1721,apple asian carriers are making iphone cheaper...,0.197525,0.4712
8,2015-01-11,"Caturday: There is no MacBook, there is only Z...",251,663,1124,caturday there is no macbook there is only zuu...,0.217169,0.457028
9,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",141,531,866,the binary options competition win lnkd l gool...,0.220221,0.442879


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment3dapple = []

for index, row in comb_tweets_apple3d.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment3dapple.append(s)

print(sentiment3dapple)

[{'neg': 0.059, 'neu': 0.786, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.049, 'neu': 0.776, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.053, 'neu': 0.767, 'pos': 0.18, 'compound': 1.0}, {'neg': 0.063, 'neu': 0.76, 'pos': 0.177, 'compound': 1.0}, {'neg': 0.06, 'neu': 0.766, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.058, 'neu': 0.773, 'pos': 0.169, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.784, 'pos': 0.165, 'compound': 1.0}, {'neg': 0.047, 'neu': 0.787, 'pos': 0.166, 'compound': 1.0}, {'neg': 0.048, 'neu': 0.781, 'pos': 0.171, 'compound': 1.0}, {'neg': 0.042, 'neu': 0.784, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.049, 'neu': 0.768, 'pos': 0.182, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.774, 'pos': 0.169, 'compound': 1.0}, {'neg': 0.059, 'neu': 0.77, 'pos': 0.171, 'compound': 1.0}, {'neg': 0.07, 'neu': 0.783, 'pos': 0.147, 'compound': 1.0}, {'neg': 0.073, 'neu': 0.788, 'pos': 0.139, 'compound': 1.0}, {'neg': 0.071, 'neu': 0.78, 'pos': 0.149, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.788, 

In [None]:
# converting the sentiment values to a dataframe
sentiment3dapple_df = pd.DataFrame(sentiment3dapple)
print(sentiment3dapple_df)

        neg    neu    pos  compound
0     0.059  0.786  0.154       1.0
1     0.049  0.776  0.174       1.0
2     0.053  0.767  0.180       1.0
3     0.063  0.760  0.177       1.0
4     0.060  0.766  0.174       1.0
...     ...    ...    ...       ...
1820  0.054  0.795  0.151       1.0
1821  0.054  0.796  0.149       1.0
1822  0.049  0.787  0.164       1.0
1823  0.053  0.792  0.155       1.0
1824  0.055  0.791  0.154       1.0

[1825 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_apple3d['neg'],comb_tweets_apple3d['neu'],comb_tweets_apple3d['pos'],comb_tweets_apple3d['compound'] = sentiment3dapple_df['neg'], sentiment3dapple_df['neu'], sentiment3dapple_df['pos'], sentiment3dapple_df['compound'] 

comb_tweets_apple3d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,zacks bull of the day apple have a great weeke...,0.158465,0.468734,0.059,0.786,0.154,1.0
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,free in account balance for first members lnkd...,0.175851,0.476125,0.049,0.776,0.174,1.0
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,free in account balance for first members lnkd...,0.183886,0.47845,0.053,0.767,0.18,1.0
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,apple does the party end in long with successf...,0.169583,0.468,0.063,0.76,0.177,1.0
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,the ever changing world of apple what dean kar...,0.164246,0.459994,0.06,0.766,0.174,1.0
5,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,471,865,1841,one thing apple should do to preserve its mark...,0.172146,0.466585,0.058,0.773,0.169,1.0
6,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,533,1001,2011,how iphone demand could impact apple s q2 guid...,0.183188,0.47059,0.051,0.784,0.165,1.0
7,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,478,1000,1721,apple asian carriers are making iphone cheaper...,0.197525,0.4712,0.047,0.787,0.166,1.0
8,2015-01-11,"Caturday: There is no MacBook, there is only Z...",251,663,1124,caturday there is no macbook there is only zuu...,0.217169,0.457028,0.048,0.781,0.171,1.0
9,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",141,531,866,the binary options competition win lnkd l gool...,0.220221,0.442879,0.042,0.784,0.174,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt3dapple = []

for index, row in comb_tweets_apple3d.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt3dapple.append(snt)

In [None]:
print(sntmnt3dapple)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_apple3d['sentiment'] = sntmnt3dapple

In [None]:
# exporting the second dataframe
comb_tweets_apple3d.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Apple/comb_tweets_apple3d_snt.csv')

### **Dataset - 3 : Last 7 Days Tweets**

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_apple7d['polarity'] = comb_tweets_apple7d['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_apple7d['subjectivity'] = comb_tweets_apple7d['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_apple7d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-07,The Ever-Changing World Of Apple http://seekin...,587,4330,2876,the ever changing world of apple what dean kar...,0.166986,0.464993
1,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,821,3790,3472,one thing apple should do to preserve its mark...,0.172842,0.468477
2,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,870,2052,3522,how iphone demand could impact apple s q2 guid...,0.177947,0.471271
3,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,889,2157,3616,apple asian carriers are making iphone cheaper...,0.182011,0.467047
4,2015-01-11,"Caturday: There is no MacBook, there is only Z...",861,1920,3519,caturday there is no macbook there is only zuu...,0.184405,0.46369
5,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",789,1760,3319,the binary options competition win lnkd l gool...,0.185916,0.462454
6,2015-01-13,What's Driving Our $30 Billion Valuation For A...,809,1989,3399,what s driving our billion valuation for apple...,0.183448,0.457585
7,2015-01-14,The Storage Lawsuit Against Apple Is Beyond Fr...,817,2049,3272,the storage lawsuit against apple is beyond fr...,0.173973,0.455505
8,2015-01-15,Apple 2015 Outlook: Where Do We Go From Here? ...,711,1826,2812,apple outlook where do we go from here qihu ba...,0.167914,0.44767
9,2015-01-16,Apple's App Store Will Surprise To The Upside ...,690,1929,2561,apple s app store will surprise to the upside ...,0.151119,0.445665


In [None]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment7dapple = []

for index, row in comb_tweets_apple7d.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment7dapple.append(s)

print(sentiment7dapple)

KeyboardInterrupt: ignored

In [None]:
# converting the sentiment values to a dataframe
sentiment7dapple_df = pd.DataFrame(sentiment7dapple)
print(sentiment7dapple_df)

In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_apple7d['neg'],comb_tweets_apple7d['neu'],comb_tweets_apple7d['pos'],comb_tweets_apple7d['compound'] = sentiment7dapple_df['neg'], sentiment7dapple_df['neu'], sentiment7dapple_df['pos'], sentiment7dapple_df['compound'] 

comb_tweets_apple7d.head(10)

In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt7dapple = []

for index, row in comb_tweets_apple7d.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt7dapple.append(snt)

In [None]:
print(sntmnt7dapple)

In [None]:
comb_tweets_apple7d['sentiment'] = sntmnt7dapple

In [None]:
# exporting the third dataframe
comb_tweets_apple7d.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Apple/comb_tweets_apple7d_snt.csv')

### **Dataset - 4 : Previous Day Tweets**

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_appleprev['polarity'] = comb_tweets_appleprev['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_appleprev['subjectivity'] = comb_tweets_appleprev['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_appleprev.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",6,33,50,lx21 made on check it out learn exe watt imrs ...,0.067588,0.469103
1,2015-01-02,@davenathan I just expected something more ext...,52,1240,408,i just expected something more extravagant lik...,0.166638,0.469114
2,2015-01-03,The Closing Bell is out! http://paper.li/BonaC...,108,1858,439,the closing bell is out eurusd gbpusd usdjpy a...,0.146284,0.446422
3,2015-01-04,The Closing Bell is out! http://paper.li/BonaC...,15,62,133,the closing bell is out eurusd gbpusd usdjpy a...,0.216906,0.507264
4,2015-01-05,#Apple : Price Target Raised to $125.00 at Arg...,74,385,411,price target raised to at argus free in accoun...,0.228005,0.496103
5,2015-01-06,Stock Contest!! Pick $COP and WIN a FREE Table...,128,373,536,stock contest pick cop and win a free tablet e...,0.159218,0.460904
6,2015-01-07,Apple reports earnings on January 27 http://m....,141,254,525,apple reports earnings on january join me on t...,0.15465,0.461288
7,2015-01-08,More U.S. dollar damage $AAPL:$USD - http://sc...,91,196,516,more u s dollar damage usd here s how apple co...,0.172645,0.453962
8,2015-01-09,Those stocks? $AAPL $BABA $TSLA $VA ... If sh...,267,487,877,those stocks baba va if she had let me do i wo...,0.194843,0.482668
9,2015-01-10,"Join me on tsū, is sharing social revenues wit...",166,394,587,join me on ts is sharing social revenues with ...,0.179049,0.466481


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentimentprevapple = []

for index, row in comb_tweets_appleprev.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentimentprevapple.append(s)

print(sentimentprevapple)

[{'neg': 0.041, 'neu': 0.894, 'pos': 0.065, 'compound': 0.9354}, {'neg': 0.04, 'neu': 0.835, 'pos': 0.125, 'compound': 0.9998}, {'neg': 0.058, 'neu': 0.794, 'pos': 0.148, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.754, 'pos': 0.2, 'compound': 1.0}, {'neg': 0.043, 'neu': 0.758, 'pos': 0.198, 'compound': 1.0}, {'neg': 0.067, 'neu': 0.778, 'pos': 0.155, 'compound': 1.0}, {'neg': 0.07, 'neu': 0.756, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.052, 'neu': 0.776, 'pos': 0.172, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.8, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.061, 'neu': 0.798, 'pos': 0.142, 'compound': 1.0}, {'neg': 0.022, 'neu': 0.839, 'pos': 0.139, 'compound': 0.9999}, {'neg': 0.014, 'neu': 0.845, 'pos': 0.141, 'compound': 0.9999}, {'neg': 0.049, 'neu': 0.785, 'pos': 0.166, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.772, 'pos': 0.172, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.796, 'pos': 0.138, 'compound': 1.0}, {'neg': 0.079, 'neu': 0.782, 'pos': 0.139, 'compound': 1.0}, {'neg': 0.078, 'n

In [None]:
# converting the sentiment values to a dataframe
sentimentprevapple_df = pd.DataFrame(sentimentprevapple)
print(sentimentprevapple_df)

        neg    neu    pos  compound
0     0.041  0.894  0.065    0.9354
1     0.040  0.835  0.125    0.9998
2     0.058  0.794  0.148    1.0000
3     0.046  0.754  0.200    1.0000
4     0.043  0.758  0.198    1.0000
...     ...    ...    ...       ...
1821  0.059  0.793  0.149    1.0000
1822  0.050  0.788  0.163    0.9999
1823  0.059  0.800  0.142    0.9999
1824  0.051  0.779  0.169    1.0000
1825  0.055  0.807  0.138    1.0000

[1826 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_appleprev['neg'],comb_tweets_appleprev['neu'],comb_tweets_appleprev['pos'],comb_tweets_appleprev['compound'] = sentimentprevapple_df['neg'], sentimentprevapple_df['neu'], sentimentprevapple_df['pos'], sentimentprevapple_df['compound'] 

comb_tweets_appleprev.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",6,33,50,lx21 made on check it out learn exe watt imrs ...,0.067588,0.469103,0.041,0.894,0.065,0.9354
1,2015-01-02,@davenathan I just expected something more ext...,52,1240,408,i just expected something more extravagant lik...,0.166638,0.469114,0.04,0.835,0.125,0.9998
2,2015-01-03,The Closing Bell is out! http://paper.li/BonaC...,108,1858,439,the closing bell is out eurusd gbpusd usdjpy a...,0.146284,0.446422,0.058,0.794,0.148,1.0
3,2015-01-04,The Closing Bell is out! http://paper.li/BonaC...,15,62,133,the closing bell is out eurusd gbpusd usdjpy a...,0.216906,0.507264,0.046,0.754,0.2,1.0
4,2015-01-05,#Apple : Price Target Raised to $125.00 at Arg...,74,385,411,price target raised to at argus free in accoun...,0.228005,0.496103,0.043,0.758,0.198,1.0
5,2015-01-06,Stock Contest!! Pick $COP and WIN a FREE Table...,128,373,536,stock contest pick cop and win a free tablet e...,0.159218,0.460904,0.067,0.778,0.155,1.0
6,2015-01-07,Apple reports earnings on January 27 http://m....,141,254,525,apple reports earnings on january join me on t...,0.15465,0.461288,0.07,0.756,0.174,1.0
7,2015-01-08,More U.S. dollar damage $AAPL:$USD - http://sc...,91,196,516,more u s dollar damage usd here s how apple co...,0.172645,0.453962,0.052,0.776,0.172,1.0
8,2015-01-09,Those stocks? $AAPL $BABA $TSLA $VA ... If sh...,267,487,877,those stocks baba va if she had let me do i wo...,0.194843,0.482668,0.046,0.8,0.154,1.0
9,2015-01-10,"Join me on tsū, is sharing social revenues wit...",166,394,587,join me on ts is sharing social revenues with ...,0.179049,0.466481,0.061,0.798,0.142,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmntprevapple = []

for index, row in comb_tweets_appleprev.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmntprevapple.append(snt)

In [None]:
print(sntmntprevapple)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_appleprev['sentiment'] = sntmntprevapple

In [None]:
# exporting the first dataframe
comb_tweets_appleprev.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Apple/comb_tweets_appleprev_snt.csv')

# **For Tesla**

## **Dataset Cleaning**

### **Dataset 1 : Same Day Tweets**

In [None]:
# importing the dataset
comb_tweets_tesla1 = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Tesla/comb_tweets_tesla1.csv')
comb_tweets_tesla1 = comb_tweets_tesla1.drop(columns=['Unnamed: 0'])
display(comb_tweets_tesla1)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554
...,...,...,...,...,...
1822,2019-12-28,Some top money flow for Dec 27Start your free ...,127,545,1579
1823,2019-12-29,Notable activity on social media in the past w...,74,307,528
1824,2019-12-30,Siguenos en telegram https://t.me/tradingbalea...,271,725,1821
1825,2019-12-31,Some top money flow for Dec 30Start your free ...,244,404,2088


In [None]:
# extracting the tweet body column
comb_tweets_tesla1['clean_text'] = comb_tweets_tesla1['body']
display(comb_tweets_tesla1['clean_text'])

0       lx21 made $10,008  on $AAPL -Check it out! htt...
1       $aapl could be a problem sitting on support th...
2       Zacks' Bull Of The Day: Apple http://seekingal...
3       Free 5€ in account balance for first 100.000 m...
4       Free 5€ in account balance for first 100.000 m...
                              ...                        
1822    Some top money flow for Dec 27Start your free ...
1823    Notable activity on social media in the past w...
1824    Siguenos en telegram https://t.me/tradingbalea...
1825    Some top money flow for Dec 30Start your free ...
1826    Popular: $AAPL, $IWM, $BTAI, $FTNT, $ALIM, $BP...
Name: clean_text, Length: 1827, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_tesla1['clean_text'] = comb_tweets_tesla1['clean_text'].str.lower()

In [None]:
# viewing the cleaned tweets
comb_tweets_tesla1['clean_text'] = comb_tweets_tesla1['clean_text'].apply(lambda x: clean_tweets(x))
display(comb_tweets_tesla1['clean_text'].head())

0    lx21 made on check it out learn exe watt imrs ...
1    could be a problem sitting on support that bre...
2    zacks bull of the day apple have a great weeke...
3    free in account balance for first members lnkd...
4    free in account balance for first members lnkd...
Name: clean_text, dtype: object

### **Dataset - 2 : Last 3 Days Tweets**

In [None]:
# importing the dataset
comb_tweets_tesla3d = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Tesla/comb_tweets_tesla3d.csv')
comb_tweets_tesla3d = comb_tweets_tesla3d.drop(columns=['Unnamed: 0'])
print(comb_tweets_tesla3d)

     updated_dates  ... like_num
0       2015-01-03  ...      981
1       2015-01-04  ...     1077
2       2015-01-05  ...     1069
3       2015-01-06  ...     1319
4       2015-01-07  ...     1572
...            ...  ...      ...
1820    2019-12-28  ...     4261
1821    2019-12-29  ...     3332
1822    2019-12-30  ...     3928
1823    2019-12-31  ...     4437
1824    2020-01-01  ...     4041

[1825 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_tesla3d['clean_text'] = comb_tweets_tesla3d['body']
display(comb_tweets_tesla3d)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,Zacks' Bull Of The Day: Apple http://seekingal...
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,Free 5€ in account balance for first 100.000 m...
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,Free 5€ in account balance for first 100.000 m...
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,Apple: Does The Party End In 2015? http://seek...
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,The Ever-Changing World Of Apple http://seekin...
...,...,...,...,...,...,...
1820,2019-12-28,Some top money flow for Dec 27Start your free ...,532,1081,4261,Some top money flow for Dec 27Start your free ...
1821,2019-12-29,Notable activity on social media in the past w...,397,1035,3332,Notable activity on social media in the past w...
1822,2019-12-30,Siguenos en telegram https://t.me/tradingbalea...,472,1577,3928,Siguenos en telegram https://t.me/tradingbalea...
1823,2019-12-31,Some top money flow for Dec 30Start your free ...,589,1436,4437,Some top money flow for Dec 30Start your free ...


In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_tesla3d['clean_text'] = comb_tweets_tesla3d['clean_text'].str.lower()

In [None]:
comb_tweets_tesla3d['clean_text'] = comb_tweets_tesla3d['clean_text'].apply(lambda x: clean_tweets(x))
display(comb_tweets_tesla3d['clean_text'].head())

0    zacks bull of the day apple have a great weeke...
1    free in account balance for first members lnkd...
2    free in account balance for first members lnkd...
3    apple does the party end in long with successf...
4    the ever changing world of apple what dean kar...
Name: clean_text, dtype: object

### **Dataset - 3 : Last 7 Days Tweets**

In [None]:
# importing the dataset
comb_tweets_tesla7d = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Tesla/comb_tweets_tesla7d.csv')
comb_tweets_tesla7d = comb_tweets_tesla7d.drop(columns=['Unnamed: 0'])
print(comb_tweets_tesla7d)

     updated_dates  ... like_num
0       2015-01-07  ...     2876
1       2015-01-08  ...     3472
2       2015-01-09  ...     3522
3       2015-01-10  ...     3616
4       2015-01-11  ...     3519
...            ...  ...      ...
1816    2019-12-28  ...     8743
1817    2019-12-29  ...     8104
1818    2019-12-30  ...     8661
1819    2019-12-31  ...     9614
1820    2020-01-01  ...     8830

[1821 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_tesla7d['clean_text'] = comb_tweets_tesla7d['body']
display(comb_tweets_tesla7d['clean_text'])

0       The Ever-Changing World Of Apple http://seekin...
1       One Thing Apple Should Do To Preserve Its Mark...
2       How iPhone Demand Could Impact Apple's Q2 Guid...
3       Apple: Asian Carriers Are Making iPhone 6 Chea...
4       Caturday: There is no MacBook, there is only Z...
                              ...                        
1816    Some top money flow for Dec 27Start your free ...
1817    Notable activity on social media in the past w...
1818    Siguenos en telegram https://t.me/tradingbalea...
1819    Some top money flow for Dec 30Start your free ...
1820    Popular: $AAPL, $IWM, $BTAI, $FTNT, $ALIM, $BP...
Name: clean_text, Length: 1821, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_tesla7d['clean_text'] = comb_tweets_tesla7d['clean_text'].str.lower()


In [None]:
comb_tweets_tesla7d['clean_text'] = comb_tweets_tesla7d['clean_text'].apply(lambda x: clean_tweets(x))
print(comb_tweets_tesla7d['clean_text'].head())

0    the ever changing world of apple what dean kar...
1    one thing apple should do to preserve its mark...
2    how iphone demand could impact apple s q2 guid...
3    apple asian carriers are making iphone cheaper...
4    caturday there is no macbook there is only zuu...
Name: clean_text, dtype: object


### **Dataset - 4 : Previous Days Tweets**

In [None]:
# importing the dataset
comb_tweets_teslaprev = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Tesla/comb_tweets_teslaprev.csv')
comb_tweets_teslaprev = comb_tweets_teslaprev.drop(columns=['Unnamed: 0'])
print(comb_tweets_teslaprev)

     updated_dates  ... like_num
0       2015-01-01  ...       50
1       2015-01-02  ...      408
2       2015-01-03  ...      439
3       2015-01-04  ...      133
4       2015-01-05  ...      411
...            ...  ...      ...
1821    2019-12-28  ...     1058
1822    2019-12-29  ...     1460
1823    2019-12-30  ...      577
1824    2019-12-31  ...     2684
1825    2020-01-01  ...     1081

[1826 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_teslaprev['clean_text'] = comb_tweets_teslaprev['body']
display(comb_tweets_teslaprev['clean_text'].head())

0    lx21 made $10,008  on $AAPL -Check it out! htt...
1    @davenathan I just expected something more ext...
2    The Closing Bell is out! http://paper.li/BonaC...
3    The Closing Bell is out! http://paper.li/BonaC...
4    #Apple : Price Target Raised to $125.00 at Arg...
Name: clean_text, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase and saving it in a new column
comb_tweets_teslaprev['clean_text'] = comb_tweets_teslaprev['clean_text'].str.lower()


In [None]:
comb_tweets_teslaprev['clean_text'] = comb_tweets_teslaprev['clean_text'].apply(lambda x: clean_tweets(x))
print(comb_tweets_teslaprev['clean_text'].head())

0    lx21 made on check it out learn exe watt imrs ...
1    i just expected something more extravagant lik...
2    the closing bell is out eurusd gbpusd usdjpy a...
3    the closing bell is out eurusd gbpusd usdjpy a...
4    price target raised to at argus free in accoun...
Name: clean_text, dtype: object


## **Finding Sentiment Analysis Values**

### **Dataset - 1 : Same Day Tweets**

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_tesla1['polarity'] = comb_tweets_tesla1['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_tesla1['subjectivity'] = comb_tweets_tesla1['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_tesla1.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227,lx21 made on check it out learn exe watt imrs ...,0.152047,0.469165
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562,could be a problem sitting on support that bre...,0.137389,0.445212
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192,zacks bull of the day apple have a great weeke...,0.186061,0.49581
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323,free in account balance for first members lnkd...,0.214288,0.494194
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554,free in account balance for first members lnkd...,0.171292,0.464973
5,2015-01-06,Apple: Does The Party End In 2015? http://seek...,115,228,442,apple does the party end in long with successf...,0.15294,0.461804
6,2015-01-07,The Ever-Changing World Of Apple http://seekin...,105,209,576,the ever changing world of apple what dean kar...,0.170265,0.452655
7,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,251,428,823,one thing apple should do to preserve its mark...,0.189625,0.4805
8,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,177,364,612,how iphone demand could impact apple s q2 guid...,0.186721,0.474236
9,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,50,208,286,apple asian carriers are making iphone cheaper...,0.260925,0.425825


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment1tesla = []

for index, row in comb_tweets_tesla1.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment1tesla.append(s)

print(sentiment1tesla)

[{'neg': 0.049, 'neu': 0.795, 'pos': 0.156, 'compound': 0.9999}, {'neg': 0.055, 'neu': 0.825, 'pos': 0.12, 'compound': 0.9999}, {'neg': 0.047, 'neu': 0.772, 'pos': 0.181, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.762, 'pos': 0.193, 'compound': 1.0}, {'neg': 0.06, 'neu': 0.773, 'pos': 0.167, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.752, 'pos': 0.182, 'compound': 1.0}, {'neg': 0.055, 'neu': 0.781, 'pos': 0.164, 'compound': 1.0}, {'neg': 0.048, 'neu': 0.787, 'pos': 0.164, 'compound': 1.0}, {'neg': 0.054, 'neu': 0.786, 'pos': 0.159, 'compound': 1.0}, {'neg': 0.029, 'neu': 0.795, 'pos': 0.176, 'compound': 1.0}, {'neg': 0.036, 'neu': 0.752, 'pos': 0.212, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.795, 'pos': 0.155, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.76, 'pos': 0.188, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.78, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.075, 'neu': 0.785, 'pos': 0.14, 'compound': 1.0}, {'neg': 0.074, 'neu': 0.788, 'pos': 0.137, 'compound': 1.0}, {'neg': 0.058, 'neu': 

In [None]:
# converting the sentiment values to a dataframe
sentiment1tesla_df = pd.DataFrame(sentiment1tesla)
print(sentiment1tesla_df)

        neg    neu    pos  compound
0     0.049  0.795  0.156    0.9999
1     0.055  0.825  0.120    0.9999
2     0.047  0.772  0.181    1.0000
3     0.046  0.762  0.193    1.0000
4     0.060  0.773  0.167    1.0000
...     ...    ...    ...       ...
1822  0.044  0.820  0.136    0.9999
1823  0.047  0.811  0.142    0.9999
1824  0.051  0.776  0.173    1.0000
1825  0.052  0.808  0.140    1.0000
1826  0.074  0.784  0.143    0.9988

[1827 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_tesla1['neg'],comb_tweets_tesla1['neu'],comb_tweets_tesla1['pos'],comb_tweets_tesla1['compound'] = sentiment1tesla_df['neg'], sentiment1tesla_df['neu'], sentiment1tesla_df['pos'], sentiment1tesla_df['compound'] 

comb_tweets_tesla1.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227,lx21 made on check it out learn exe watt imrs ...,0.152047,0.469165,0.049,0.795,0.156,0.9999
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562,could be a problem sitting on support that bre...,0.137389,0.445212,0.055,0.825,0.12,0.9999
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192,zacks bull of the day apple have a great weeke...,0.186061,0.49581,0.047,0.772,0.181,1.0
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323,free in account balance for first members lnkd...,0.214288,0.494194,0.046,0.762,0.193,1.0
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554,free in account balance for first members lnkd...,0.171292,0.464973,0.06,0.773,0.167,1.0
5,2015-01-06,Apple: Does The Party End In 2015? http://seek...,115,228,442,apple does the party end in long with successf...,0.15294,0.461804,0.066,0.752,0.182,1.0
6,2015-01-07,The Ever-Changing World Of Apple http://seekin...,105,209,576,the ever changing world of apple what dean kar...,0.170265,0.452655,0.055,0.781,0.164,1.0
7,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,251,428,823,one thing apple should do to preserve its mark...,0.189625,0.4805,0.048,0.787,0.164,1.0
8,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,177,364,612,how iphone demand could impact apple s q2 guid...,0.186721,0.474236,0.054,0.786,0.159,1.0
9,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,50,208,286,apple asian carriers are making iphone cheaper...,0.260925,0.425825,0.029,0.795,0.176,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt1tesla = []

for index, row in comb_tweets_tesla1.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt1tesla.append(snt)

In [None]:
print(sntmnt1tesla)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_tesla1['sentiment'] = sntmnt1tesla
print(comb_tweets_tesla1.head())

  updated_dates                                               body  \
0    2015-01-01  lx21 made $10,008  on $AAPL -Check it out! htt...   
1    2015-01-02  $aapl could be a problem sitting on support th...   
2    2015-01-03  Zacks' Bull Of The Day: Apple http://seekingal...   
3    2015-01-04  Free 5€ in account balance for first 100.000 m...   
4    2015-01-05  Free 5€ in account balance for first 100.000 m...   

   comment_num  retweet_num  like_num  \
0           17          968       227   
1          128         2102       562   
2           31          103       192   
3           52          328       323   
4          139          392       554   

                                          clean_text  polarity  subjectivity  \
0  lx21 made on check it out learn exe watt imrs ...  0.152047      0.469165   
1  could be a problem sitting on support that bre...  0.137389      0.445212   
2  zacks bull of the day apple have a great weeke...  0.186061      0.495810   
3  free in a

In [None]:
# exporting the first dataframe
comb_tweets_tesla1.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Tesla/comb_tweets_tesla1_snt.csv')

### Dataset - 2 : Last 3 Days Tweets

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_tesla3d['polarity'] = comb_tweets_tesla3d['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_tesla3d['subjectivity'] = comb_tweets_tesla3d['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_tesla3d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,zacks bull of the day apple have a great weeke...,0.158465,0.468734
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,free in account balance for first members lnkd...,0.175851,0.476125
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,free in account balance for first members lnkd...,0.183886,0.47845
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,apple does the party end in long with successf...,0.169583,0.468
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,the ever changing world of apple what dean kar...,0.164246,0.459994
5,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,471,865,1841,one thing apple should do to preserve its mark...,0.172146,0.466585
6,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,533,1001,2011,how iphone demand could impact apple s q2 guid...,0.183188,0.47059
7,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,478,1000,1721,apple asian carriers are making iphone cheaper...,0.197525,0.4712
8,2015-01-11,"Caturday: There is no MacBook, there is only Z...",251,663,1124,caturday there is no macbook there is only zuu...,0.217169,0.457028
9,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",141,531,866,the binary options competition win lnkd l gool...,0.220221,0.442879


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment3dtesla = []

for index, row in comb_tweets_tesla3d.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment3dtesla.append(s)

print(sentiment3dtesla)

[{'neg': 0.059, 'neu': 0.786, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.049, 'neu': 0.776, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.053, 'neu': 0.767, 'pos': 0.18, 'compound': 1.0}, {'neg': 0.063, 'neu': 0.76, 'pos': 0.177, 'compound': 1.0}, {'neg': 0.06, 'neu': 0.766, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.058, 'neu': 0.773, 'pos': 0.169, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.784, 'pos': 0.165, 'compound': 1.0}, {'neg': 0.047, 'neu': 0.787, 'pos': 0.166, 'compound': 1.0}, {'neg': 0.048, 'neu': 0.781, 'pos': 0.171, 'compound': 1.0}, {'neg': 0.042, 'neu': 0.784, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.049, 'neu': 0.768, 'pos': 0.182, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.774, 'pos': 0.169, 'compound': 1.0}, {'neg': 0.059, 'neu': 0.77, 'pos': 0.171, 'compound': 1.0}, {'neg': 0.07, 'neu': 0.783, 'pos': 0.147, 'compound': 1.0}, {'neg': 0.073, 'neu': 0.788, 'pos': 0.139, 'compound': 1.0}, {'neg': 0.071, 'neu': 0.78, 'pos': 0.149, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.788, 

In [None]:
# converting the sentiment values to a dataframe
sentiment3dtesla_df = pd.DataFrame(sentiment3dtesla)
print(sentiment3dtesla_df)

        neg    neu    pos  compound
0     0.059  0.786  0.154       1.0
1     0.049  0.776  0.174       1.0
2     0.053  0.767  0.180       1.0
3     0.063  0.760  0.177       1.0
4     0.060  0.766  0.174       1.0
...     ...    ...    ...       ...
1820  0.054  0.795  0.151       1.0
1821  0.054  0.796  0.149       1.0
1822  0.049  0.787  0.164       1.0
1823  0.053  0.792  0.155       1.0
1824  0.055  0.791  0.154       1.0

[1825 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_tesla3d['neg'],comb_tweets_tesla3d['neu'],comb_tweets_tesla3d['pos'],comb_tweets_tesla3d['compound'] = sentiment3dtesla_df['neg'], sentiment3dtesla_df['neu'], sentiment3dtesla_df['pos'], sentiment3dtesla_df['compound'] 

comb_tweets_tesla3d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,zacks bull of the day apple have a great weeke...,0.158465,0.468734,0.059,0.786,0.154,1.0
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,free in account balance for first members lnkd...,0.175851,0.476125,0.049,0.776,0.174,1.0
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,free in account balance for first members lnkd...,0.183886,0.47845,0.053,0.767,0.18,1.0
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,apple does the party end in long with successf...,0.169583,0.468,0.063,0.76,0.177,1.0
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,the ever changing world of apple what dean kar...,0.164246,0.459994,0.06,0.766,0.174,1.0
5,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,471,865,1841,one thing apple should do to preserve its mark...,0.172146,0.466585,0.058,0.773,0.169,1.0
6,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,533,1001,2011,how iphone demand could impact apple s q2 guid...,0.183188,0.47059,0.051,0.784,0.165,1.0
7,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,478,1000,1721,apple asian carriers are making iphone cheaper...,0.197525,0.4712,0.047,0.787,0.166,1.0
8,2015-01-11,"Caturday: There is no MacBook, there is only Z...",251,663,1124,caturday there is no macbook there is only zuu...,0.217169,0.457028,0.048,0.781,0.171,1.0
9,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",141,531,866,the binary options competition win lnkd l gool...,0.220221,0.442879,0.042,0.784,0.174,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt3dtesla = []

for index, row in comb_tweets_tesla3d.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt3dtesla.append(snt)

In [None]:
print(sntmnt3dtesla)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_tesla3d['sentiment'] = sntmnt3dtesla

In [None]:
# exporting the second dataframe
comb_tweets_tesla3d.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Tesla/comb_tweets_tesla3d_snt.csv')

### Dataset - 3 : Last 7 Days Tweets

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_tesla7d['polarity'] = comb_tweets_tesla7d['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_tesla7d['subjectivity'] = comb_tweets_tesla7d['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_tesla7d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-07,The Ever-Changing World Of Apple http://seekin...,587,4330,2876,the ever changing world of apple what dean kar...,0.166986,0.464993
1,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,821,3790,3472,one thing apple should do to preserve its mark...,0.172842,0.468477
2,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,870,2052,3522,how iphone demand could impact apple s q2 guid...,0.177947,0.471271
3,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,889,2157,3616,apple asian carriers are making iphone cheaper...,0.182011,0.467047
4,2015-01-11,"Caturday: There is no MacBook, there is only Z...",861,1920,3519,caturday there is no macbook there is only zuu...,0.184405,0.46369
5,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",789,1760,3319,the binary options competition win lnkd l gool...,0.185916,0.462454
6,2015-01-13,What's Driving Our $30 Billion Valuation For A...,809,1989,3399,what s driving our billion valuation for apple...,0.183448,0.457585
7,2015-01-14,The Storage Lawsuit Against Apple Is Beyond Fr...,817,2049,3272,the storage lawsuit against apple is beyond fr...,0.173973,0.455505
8,2015-01-15,Apple 2015 Outlook: Where Do We Go From Here? ...,711,1826,2812,apple outlook where do we go from here qihu ba...,0.167914,0.44767
9,2015-01-16,Apple's App Store Will Surprise To The Upside ...,690,1929,2561,apple s app store will surprise to the upside ...,0.151119,0.445665


In [None]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment7dtesla = []

for index, row in comb_tweets_tesla7d.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment7dtesla.append(s)

print(sentiment7dtesla)

KeyboardInterrupt: ignored

In [None]:
# converting the sentiment values to a dataframe
sentiment7dtesla_df = pd.DataFrame(sentiment7dtesla)
print(sentiment7dtesla_df)

In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_tesla7d['neg'],comb_tweets_tesla7d['neu'],comb_tweets_tesla7d['pos'],comb_tweets_tesla7d['compound'] = sentiment7dtesla_df['neg'], sentiment7dtesla_df['neu'], sentiment7dtesla_df['pos'], sentiment7dtesla_df['compound'] 

comb_tweets_tesla7d.head(10)

In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt7dtesla = []

for index, row in comb_tweets_tesla7d.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt7dtesla.append(snt)

In [None]:
print(sntmnt7dtesla)

In [None]:
comb_tweets_tesla7d['sentiment'] = sntmnt7dtesla

In [None]:
# exporting the third dataframe
comb_tweets_tesla7d.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Tesla/comb_tweets_tesla7d_snt.csv')

### Dataset - 4 : Previous Day Tweets

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_teslaprev['polarity'] = comb_tweets_teslaprev['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_teslaprev['subjectivity'] = comb_tweets_teslaprev['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_teslaprev.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",6,33,50,lx21 made on check it out learn exe watt imrs ...,0.067588,0.469103
1,2015-01-02,@davenathan I just expected something more ext...,52,1240,408,i just expected something more extravagant lik...,0.166638,0.469114
2,2015-01-03,The Closing Bell is out! http://paper.li/BonaC...,108,1858,439,the closing bell is out eurusd gbpusd usdjpy a...,0.146284,0.446422
3,2015-01-04,The Closing Bell is out! http://paper.li/BonaC...,15,62,133,the closing bell is out eurusd gbpusd usdjpy a...,0.216906,0.507264
4,2015-01-05,#Apple : Price Target Raised to $125.00 at Arg...,74,385,411,price target raised to at argus free in accoun...,0.228005,0.496103
5,2015-01-06,Stock Contest!! Pick $COP and WIN a FREE Table...,128,373,536,stock contest pick cop and win a free tablet e...,0.159218,0.460904
6,2015-01-07,Apple reports earnings on January 27 http://m....,141,254,525,apple reports earnings on january join me on t...,0.15465,0.461288
7,2015-01-08,More U.S. dollar damage $AAPL:$USD - http://sc...,91,196,516,more u s dollar damage usd here s how apple co...,0.172645,0.453962
8,2015-01-09,Those stocks? $AAPL $BABA $TSLA $VA ... If sh...,267,487,877,those stocks baba va if she had let me do i wo...,0.194843,0.482668
9,2015-01-10,"Join me on tsū, is sharing social revenues wit...",166,394,587,join me on ts is sharing social revenues with ...,0.179049,0.466481


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentimentprevtesla = []

for index, row in comb_tweets_teslaprev.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentimentprevtesla.append(s)

print(sentimentprevtesla)

[{'neg': 0.041, 'neu': 0.894, 'pos': 0.065, 'compound': 0.9354}, {'neg': 0.04, 'neu': 0.835, 'pos': 0.125, 'compound': 0.9998}, {'neg': 0.058, 'neu': 0.794, 'pos': 0.148, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.754, 'pos': 0.2, 'compound': 1.0}, {'neg': 0.043, 'neu': 0.758, 'pos': 0.198, 'compound': 1.0}, {'neg': 0.067, 'neu': 0.778, 'pos': 0.155, 'compound': 1.0}, {'neg': 0.07, 'neu': 0.756, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.052, 'neu': 0.776, 'pos': 0.172, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.8, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.061, 'neu': 0.798, 'pos': 0.142, 'compound': 1.0}, {'neg': 0.022, 'neu': 0.839, 'pos': 0.139, 'compound': 0.9999}, {'neg': 0.014, 'neu': 0.845, 'pos': 0.141, 'compound': 0.9999}, {'neg': 0.049, 'neu': 0.785, 'pos': 0.166, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.772, 'pos': 0.172, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.796, 'pos': 0.138, 'compound': 1.0}, {'neg': 0.079, 'neu': 0.782, 'pos': 0.139, 'compound': 1.0}, {'neg': 0.078, 'n

In [None]:
# converting the sentiment values to a dataframe
sentimentprevtesla_df = pd.DataFrame(sentimentprevtesla)
print(sentimentprevtesla_df)

        neg    neu    pos  compound
0     0.041  0.894  0.065    0.9354
1     0.040  0.835  0.125    0.9998
2     0.058  0.794  0.148    1.0000
3     0.046  0.754  0.200    1.0000
4     0.043  0.758  0.198    1.0000
...     ...    ...    ...       ...
1821  0.059  0.793  0.149    1.0000
1822  0.050  0.788  0.163    0.9999
1823  0.059  0.800  0.142    0.9999
1824  0.051  0.779  0.169    1.0000
1825  0.055  0.807  0.138    1.0000

[1826 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_appletesla['neg'],comb_tweets_appletesla['neu'],comb_tweets_appletesla['pos'],comb_tweets_appletesla['compound'] = sentimentteslaapple_df['neg'], sentimentteslaapple_df['neu'], sentimentteslaapple_df['pos'], sentimentteslaapple_df['compound'] 

comb_tweets_appletesla.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",6,33,50,lx21 made on check it out learn exe watt imrs ...,0.067588,0.469103,0.041,0.894,0.065,0.9354
1,2015-01-02,@davenathan I just expected something more ext...,52,1240,408,i just expected something more extravagant lik...,0.166638,0.469114,0.04,0.835,0.125,0.9998
2,2015-01-03,The Closing Bell is out! http://paper.li/BonaC...,108,1858,439,the closing bell is out eurusd gbpusd usdjpy a...,0.146284,0.446422,0.058,0.794,0.148,1.0
3,2015-01-04,The Closing Bell is out! http://paper.li/BonaC...,15,62,133,the closing bell is out eurusd gbpusd usdjpy a...,0.216906,0.507264,0.046,0.754,0.2,1.0
4,2015-01-05,#Apple : Price Target Raised to $125.00 at Arg...,74,385,411,price target raised to at argus free in accoun...,0.228005,0.496103,0.043,0.758,0.198,1.0
5,2015-01-06,Stock Contest!! Pick $COP and WIN a FREE Table...,128,373,536,stock contest pick cop and win a free tablet e...,0.159218,0.460904,0.067,0.778,0.155,1.0
6,2015-01-07,Apple reports earnings on January 27 http://m....,141,254,525,apple reports earnings on january join me on t...,0.15465,0.461288,0.07,0.756,0.174,1.0
7,2015-01-08,More U.S. dollar damage $AAPL:$USD - http://sc...,91,196,516,more u s dollar damage usd here s how apple co...,0.172645,0.453962,0.052,0.776,0.172,1.0
8,2015-01-09,Those stocks? $AAPL $BABA $TSLA $VA ... If sh...,267,487,877,those stocks baba va if she had let me do i wo...,0.194843,0.482668,0.046,0.8,0.154,1.0
9,2015-01-10,"Join me on tsū, is sharing social revenues wit...",166,394,587,join me on ts is sharing social revenues with ...,0.179049,0.466481,0.061,0.798,0.142,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmntprevtesla = []

for index, row in comb_tweets_teslaprev.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmntprevtesla.append(snt)

In [None]:
print(sntmntprevtesla)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_teslaprev['sentiment'] = sntmntprevtesla

In [None]:
# exporting the first dataframe
comb_tweets_teslaprev.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Tesla/comb_tweets_teslaprev_snt.csv')

# **For Amazon**

## **Dataset Cleaning**

### **Dataset 1 : Same Day Tweets**

In [None]:
# importing the dataset
comb_tweets_amazon1 = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Amazon/comb_tweets_amazon1.csv')
comb_tweets_amazon1 = comb_tweets_amazon1.drop(columns=['Unnamed: 0'])
display(comb_tweets_amazon1)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554
...,...,...,...,...,...
1822,2019-12-28,Some top money flow for Dec 27Start your free ...,127,545,1579
1823,2019-12-29,Notable activity on social media in the past w...,74,307,528
1824,2019-12-30,Siguenos en telegram https://t.me/tradingbalea...,271,725,1821
1825,2019-12-31,Some top money flow for Dec 30Start your free ...,244,404,2088


In [None]:
# extracting the tweet body column
comb_tweets_amazon1['clean_text'] = comb_tweets_amazon1['body']
display(comb_tweets_amazon1['clean_text'])

0       lx21 made $10,008  on $AAPL -Check it out! htt...
1       $aapl could be a problem sitting on support th...
2       Zacks' Bull Of The Day: Apple http://seekingal...
3       Free 5€ in account balance for first 100.000 m...
4       Free 5€ in account balance for first 100.000 m...
                              ...                        
1822    Some top money flow for Dec 27Start your free ...
1823    Notable activity on social media in the past w...
1824    Siguenos en telegram https://t.me/tradingbalea...
1825    Some top money flow for Dec 30Start your free ...
1826    Popular: $AAPL, $IWM, $BTAI, $FTNT, $ALIM, $BP...
Name: clean_text, Length: 1827, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_amazon1['clean_text'] = comb_tweets_amazon1['clean_text'].str.lower()

In [None]:
# viewing the cleaned tweets
comb_tweets_amazon1['clean_text'] = comb_tweets_amazon1['clean_text'].apply(lambda x: clean_tweets(x))
display(comb_tweets_amazon1['clean_text'].head())

0    lx21 made on check it out learn exe watt imrs ...
1    could be a problem sitting on support that bre...
2    zacks bull of the day apple have a great weeke...
3    free in account balance for first members lnkd...
4    free in account balance for first members lnkd...
Name: clean_text, dtype: object

### **Dataset - 2 : Last 3 Days Tweets**

In [None]:
# importing the dataset
comb_tweets_amazon3d = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Amazon/comb_tweets_amazon3d.csv')
comb_tweets_amazon3d = comb_tweets_amazon3d.drop(columns=['Unnamed: 0'])
print(comb_tweets_amazon3d)

     updated_dates  ... like_num
0       2015-01-03  ...      981
1       2015-01-04  ...     1077
2       2015-01-05  ...     1069
3       2015-01-06  ...     1319
4       2015-01-07  ...     1572
...            ...  ...      ...
1820    2019-12-28  ...     4261
1821    2019-12-29  ...     3332
1822    2019-12-30  ...     3928
1823    2019-12-31  ...     4437
1824    2020-01-01  ...     4041

[1825 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_amazon3d['clean_text'] = comb_tweets_amazon3d['body']
display(comb_tweets_amazon3d)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,Zacks' Bull Of The Day: Apple http://seekingal...
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,Free 5€ in account balance for first 100.000 m...
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,Free 5€ in account balance for first 100.000 m...
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,Apple: Does The Party End In 2015? http://seek...
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,The Ever-Changing World Of Apple http://seekin...
...,...,...,...,...,...,...
1820,2019-12-28,Some top money flow for Dec 27Start your free ...,532,1081,4261,Some top money flow for Dec 27Start your free ...
1821,2019-12-29,Notable activity on social media in the past w...,397,1035,3332,Notable activity on social media in the past w...
1822,2019-12-30,Siguenos en telegram https://t.me/tradingbalea...,472,1577,3928,Siguenos en telegram https://t.me/tradingbalea...
1823,2019-12-31,Some top money flow for Dec 30Start your free ...,589,1436,4437,Some top money flow for Dec 30Start your free ...


In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_amazon3d['clean_text'] = comb_tweets_amazon3d['clean_text'].str.lower()

In [None]:
comb_tweets_amazon3d['clean_text'] = comb_tweets_amazon3d['clean_text'].apply(lambda x: clean_tweets(x))
display(comb_tweets_amazon3d['clean_text'].head())

0    zacks bull of the day apple have a great weeke...
1    free in account balance for first members lnkd...
2    free in account balance for first members lnkd...
3    apple does the party end in long with successf...
4    the ever changing world of apple what dean kar...
Name: clean_text, dtype: object

### **Dataset - 3 : Last 7 Days Tweets**

In [None]:
# importing the dataset
comb_tweets_amazon7d = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Amazon/comb_tweets_amazon7d.csv')
comb_tweets_amazon7d = comb_tweets_amazon7d.drop(columns=['Unnamed: 0'])
print(comb_tweets_amazon7d)

     updated_dates  ... like_num
0       2015-01-07  ...     2876
1       2015-01-08  ...     3472
2       2015-01-09  ...     3522
3       2015-01-10  ...     3616
4       2015-01-11  ...     3519
...            ...  ...      ...
1816    2019-12-28  ...     8743
1817    2019-12-29  ...     8104
1818    2019-12-30  ...     8661
1819    2019-12-31  ...     9614
1820    2020-01-01  ...     8830

[1821 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_amazon7d['clean_text'] = comb_tweets_amazon7d['body']
display(comb_tweets_amazon7d['clean_text'])

0       The Ever-Changing World Of Apple http://seekin...
1       One Thing Apple Should Do To Preserve Its Mark...
2       How iPhone Demand Could Impact Apple's Q2 Guid...
3       Apple: Asian Carriers Are Making iPhone 6 Chea...
4       Caturday: There is no MacBook, there is only Z...
                              ...                        
1816    Some top money flow for Dec 27Start your free ...
1817    Notable activity on social media in the past w...
1818    Siguenos en telegram https://t.me/tradingbalea...
1819    Some top money flow for Dec 30Start your free ...
1820    Popular: $AAPL, $IWM, $BTAI, $FTNT, $ALIM, $BP...
Name: clean_text, Length: 1821, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_amazon7d['clean_text'] = comb_tweets_amazon7d['clean_text'].str.lower()


In [None]:
comb_tweets_amazon7d['clean_text'] = comb_tweets_amazon7d['clean_text'].apply(lambda x: clean_tweets(x))
print(comb_tweets_amazon7d['clean_text'].head())

0    the ever changing world of apple what dean kar...
1    one thing apple should do to preserve its mark...
2    how iphone demand could impact apple s q2 guid...
3    apple asian carriers are making iphone cheaper...
4    caturday there is no macbook there is only zuu...
Name: clean_text, dtype: object


### **Dataset - 4 : Previous Days Tweets**

In [None]:
# importing the dataset
comb_tweets_amazonprev = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Amazon/comb_tweets_amazonprev.csv')
comb_tweets_amazonprev = comb_tweets_amazonprev.drop(columns=['Unnamed: 0'])
print(comb_tweets_amazonprev)

     updated_dates  ... like_num
0       2015-01-01  ...       50
1       2015-01-02  ...      408
2       2015-01-03  ...      439
3       2015-01-04  ...      133
4       2015-01-05  ...      411
...            ...  ...      ...
1821    2019-12-28  ...     1058
1822    2019-12-29  ...     1460
1823    2019-12-30  ...      577
1824    2019-12-31  ...     2684
1825    2020-01-01  ...     1081

[1826 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_amazonprev['clean_text'] = comb_tweets_amazonprev['body']
display(comb_tweets_amazonprev['clean_text'].head())

0    lx21 made $10,008  on $AAPL -Check it out! htt...
1    @davenathan I just expected something more ext...
2    The Closing Bell is out! http://paper.li/BonaC...
3    The Closing Bell is out! http://paper.li/BonaC...
4    #Apple : Price Target Raised to $125.00 at Arg...
Name: clean_text, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase and saving it in a new column
comb_tweets_amazonprev['clean_text'] = comb_tweets_amazonprev['clean_text'].str.lower()


In [None]:
comb_tweets_amazonprev['clean_text'] = comb_tweets_amazonprev['clean_text'].apply(lambda x: clean_tweets(x))
print(comb_tweets_amazonprev['clean_text'].head())

0    lx21 made on check it out learn exe watt imrs ...
1    i just expected something more extravagant lik...
2    the closing bell is out eurusd gbpusd usdjpy a...
3    the closing bell is out eurusd gbpusd usdjpy a...
4    price target raised to at argus free in accoun...
Name: clean_text, dtype: object


## **Finding Sentiment Analysis Values**

### **Dataset - 1 : Same Day Tweets**

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_amazon1['polarity'] = comb_tweets_amazon1['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_amazon1['subjectivity'] = comb_tweets_amazon1['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_amazon1.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227,lx21 made on check it out learn exe watt imrs ...,0.152047,0.469165
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562,could be a problem sitting on support that bre...,0.137389,0.445212
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192,zacks bull of the day apple have a great weeke...,0.186061,0.49581
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323,free in account balance for first members lnkd...,0.214288,0.494194
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554,free in account balance for first members lnkd...,0.171292,0.464973
5,2015-01-06,Apple: Does The Party End In 2015? http://seek...,115,228,442,apple does the party end in long with successf...,0.15294,0.461804
6,2015-01-07,The Ever-Changing World Of Apple http://seekin...,105,209,576,the ever changing world of apple what dean kar...,0.170265,0.452655
7,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,251,428,823,one thing apple should do to preserve its mark...,0.189625,0.4805
8,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,177,364,612,how iphone demand could impact apple s q2 guid...,0.186721,0.474236
9,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,50,208,286,apple asian carriers are making iphone cheaper...,0.260925,0.425825


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment1amazon = []

for index, row in comb_tweets_amazon1.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment1amazon.append(s)

print(sentiment1amazon)

[{'neg': 0.049, 'neu': 0.795, 'pos': 0.156, 'compound': 0.9999}, {'neg': 0.055, 'neu': 0.825, 'pos': 0.12, 'compound': 0.9999}, {'neg': 0.047, 'neu': 0.772, 'pos': 0.181, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.762, 'pos': 0.193, 'compound': 1.0}, {'neg': 0.06, 'neu': 0.773, 'pos': 0.167, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.752, 'pos': 0.182, 'compound': 1.0}, {'neg': 0.055, 'neu': 0.781, 'pos': 0.164, 'compound': 1.0}, {'neg': 0.048, 'neu': 0.787, 'pos': 0.164, 'compound': 1.0}, {'neg': 0.054, 'neu': 0.786, 'pos': 0.159, 'compound': 1.0}, {'neg': 0.029, 'neu': 0.795, 'pos': 0.176, 'compound': 1.0}, {'neg': 0.036, 'neu': 0.752, 'pos': 0.212, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.795, 'pos': 0.155, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.76, 'pos': 0.188, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.78, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.075, 'neu': 0.785, 'pos': 0.14, 'compound': 1.0}, {'neg': 0.074, 'neu': 0.788, 'pos': 0.137, 'compound': 1.0}, {'neg': 0.058, 'neu': 

In [None]:
# converting the sentiment values to a dataframe
sentiment1amazon_df = pd.DataFrame(sentiment1amazon)
print(sentiment1amazon_df)

        neg    neu    pos  compound
0     0.049  0.795  0.156    0.9999
1     0.055  0.825  0.120    0.9999
2     0.047  0.772  0.181    1.0000
3     0.046  0.762  0.193    1.0000
4     0.060  0.773  0.167    1.0000
...     ...    ...    ...       ...
1822  0.044  0.820  0.136    0.9999
1823  0.047  0.811  0.142    0.9999
1824  0.051  0.776  0.173    1.0000
1825  0.052  0.808  0.140    1.0000
1826  0.074  0.784  0.143    0.9988

[1827 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_amazon1['neg'],comb_tweets_amazon1['neu'],comb_tweets_amazon1['pos'],comb_tweets_amazon1['compound'] = sentiment1amazon_df['neg'], sentiment1amazon_df['neu'], sentiment1amazon_df['pos'], sentiment1amazon_df['compound'] 

comb_tweets_amazon1.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227,lx21 made on check it out learn exe watt imrs ...,0.152047,0.469165,0.049,0.795,0.156,0.9999
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562,could be a problem sitting on support that bre...,0.137389,0.445212,0.055,0.825,0.12,0.9999
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192,zacks bull of the day apple have a great weeke...,0.186061,0.49581,0.047,0.772,0.181,1.0
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323,free in account balance for first members lnkd...,0.214288,0.494194,0.046,0.762,0.193,1.0
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554,free in account balance for first members lnkd...,0.171292,0.464973,0.06,0.773,0.167,1.0
5,2015-01-06,Apple: Does The Party End In 2015? http://seek...,115,228,442,apple does the party end in long with successf...,0.15294,0.461804,0.066,0.752,0.182,1.0
6,2015-01-07,The Ever-Changing World Of Apple http://seekin...,105,209,576,the ever changing world of apple what dean kar...,0.170265,0.452655,0.055,0.781,0.164,1.0
7,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,251,428,823,one thing apple should do to preserve its mark...,0.189625,0.4805,0.048,0.787,0.164,1.0
8,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,177,364,612,how iphone demand could impact apple s q2 guid...,0.186721,0.474236,0.054,0.786,0.159,1.0
9,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,50,208,286,apple asian carriers are making iphone cheaper...,0.260925,0.425825,0.029,0.795,0.176,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt1amazon = []

for index, row in comb_tweets_amazon1.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt1amazon.append(snt)

In [None]:
print(sntmnt1amazon)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_amazon1['sentiment'] = sntmnt1amazon
print(comb_tweets_amazon1.head())

  updated_dates                                               body  \
0    2015-01-01  lx21 made $10,008  on $AAPL -Check it out! htt...   
1    2015-01-02  $aapl could be a problem sitting on support th...   
2    2015-01-03  Zacks' Bull Of The Day: Apple http://seekingal...   
3    2015-01-04  Free 5€ in account balance for first 100.000 m...   
4    2015-01-05  Free 5€ in account balance for first 100.000 m...   

   comment_num  retweet_num  like_num  \
0           17          968       227   
1          128         2102       562   
2           31          103       192   
3           52          328       323   
4          139          392       554   

                                          clean_text  polarity  subjectivity  \
0  lx21 made on check it out learn exe watt imrs ...  0.152047      0.469165   
1  could be a problem sitting on support that bre...  0.137389      0.445212   
2  zacks bull of the day apple have a great weeke...  0.186061      0.495810   
3  free in a

In [None]:
# exporting the first dataframe
comb_tweets_amazon1.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Amazon/comb_tweets_amazon1_snt.csv')

### Dataset - 2 : Last 3 Days Tweets

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_amazon3d['polarity'] = comb_tweets_amazon3d['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_amazon3d['subjectivity'] = comb_tweets_amazon3d['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_amazon3d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,zacks bull of the day apple have a great weeke...,0.158465,0.468734
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,free in account balance for first members lnkd...,0.175851,0.476125
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,free in account balance for first members lnkd...,0.183886,0.47845
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,apple does the party end in long with successf...,0.169583,0.468
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,the ever changing world of apple what dean kar...,0.164246,0.459994
5,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,471,865,1841,one thing apple should do to preserve its mark...,0.172146,0.466585
6,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,533,1001,2011,how iphone demand could impact apple s q2 guid...,0.183188,0.47059
7,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,478,1000,1721,apple asian carriers are making iphone cheaper...,0.197525,0.4712
8,2015-01-11,"Caturday: There is no MacBook, there is only Z...",251,663,1124,caturday there is no macbook there is only zuu...,0.217169,0.457028
9,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",141,531,866,the binary options competition win lnkd l gool...,0.220221,0.442879


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment3damazon = []

for index, row in comb_tweets_amazon3d.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment3damazon.append(s)

print(sentiment3damazon)

[{'neg': 0.059, 'neu': 0.786, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.049, 'neu': 0.776, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.053, 'neu': 0.767, 'pos': 0.18, 'compound': 1.0}, {'neg': 0.063, 'neu': 0.76, 'pos': 0.177, 'compound': 1.0}, {'neg': 0.06, 'neu': 0.766, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.058, 'neu': 0.773, 'pos': 0.169, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.784, 'pos': 0.165, 'compound': 1.0}, {'neg': 0.047, 'neu': 0.787, 'pos': 0.166, 'compound': 1.0}, {'neg': 0.048, 'neu': 0.781, 'pos': 0.171, 'compound': 1.0}, {'neg': 0.042, 'neu': 0.784, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.049, 'neu': 0.768, 'pos': 0.182, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.774, 'pos': 0.169, 'compound': 1.0}, {'neg': 0.059, 'neu': 0.77, 'pos': 0.171, 'compound': 1.0}, {'neg': 0.07, 'neu': 0.783, 'pos': 0.147, 'compound': 1.0}, {'neg': 0.073, 'neu': 0.788, 'pos': 0.139, 'compound': 1.0}, {'neg': 0.071, 'neu': 0.78, 'pos': 0.149, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.788, 

In [None]:
# converting the sentiment values to a dataframe
sentiment3damazon_df = pd.DataFrame(sentiment3damazon)
print(sentiment3damazon_df)

        neg    neu    pos  compound
0     0.059  0.786  0.154       1.0
1     0.049  0.776  0.174       1.0
2     0.053  0.767  0.180       1.0
3     0.063  0.760  0.177       1.0
4     0.060  0.766  0.174       1.0
...     ...    ...    ...       ...
1820  0.054  0.795  0.151       1.0
1821  0.054  0.796  0.149       1.0
1822  0.049  0.787  0.164       1.0
1823  0.053  0.792  0.155       1.0
1824  0.055  0.791  0.154       1.0

[1825 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_amazon3d['neg'],comb_tweets_amazon3d['neu'],comb_tweets_amazon3d['pos'],comb_tweets_amazon3d['compound'] = sentiment3damazon_df['neg'], sentiment3damazon_df['neu'], sentiment3damazon_df['pos'], sentiment3damazon_df['compound'] 

comb_tweets_amazon3d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,zacks bull of the day apple have a great weeke...,0.158465,0.468734,0.059,0.786,0.154,1.0
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,free in account balance for first members lnkd...,0.175851,0.476125,0.049,0.776,0.174,1.0
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,free in account balance for first members lnkd...,0.183886,0.47845,0.053,0.767,0.18,1.0
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,apple does the party end in long with successf...,0.169583,0.468,0.063,0.76,0.177,1.0
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,the ever changing world of apple what dean kar...,0.164246,0.459994,0.06,0.766,0.174,1.0
5,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,471,865,1841,one thing apple should do to preserve its mark...,0.172146,0.466585,0.058,0.773,0.169,1.0
6,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,533,1001,2011,how iphone demand could impact apple s q2 guid...,0.183188,0.47059,0.051,0.784,0.165,1.0
7,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,478,1000,1721,apple asian carriers are making iphone cheaper...,0.197525,0.4712,0.047,0.787,0.166,1.0
8,2015-01-11,"Caturday: There is no MacBook, there is only Z...",251,663,1124,caturday there is no macbook there is only zuu...,0.217169,0.457028,0.048,0.781,0.171,1.0
9,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",141,531,866,the binary options competition win lnkd l gool...,0.220221,0.442879,0.042,0.784,0.174,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt3damazon = []

for index, row in comb_tweets_amazon3d.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt3damazon.append(snt)

In [None]:
print(sntmnt3damazon)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_amazon3d['sentiment'] = sntmnt3damazon

In [None]:
# exporting the second dataframe
comb_tweets_amazon3d.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Amazon/comb_tweets_amazon3d_snt.csv')

### Dataset - 3 : Last 7 Days Tweets

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_amazon7d['polarity'] = comb_tweets_amazon7d['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_amazon7d['subjectivity'] = comb_tweets_amazon7d['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_amazon7d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-07,The Ever-Changing World Of Apple http://seekin...,587,4330,2876,the ever changing world of apple what dean kar...,0.166986,0.464993
1,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,821,3790,3472,one thing apple should do to preserve its mark...,0.172842,0.468477
2,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,870,2052,3522,how iphone demand could impact apple s q2 guid...,0.177947,0.471271
3,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,889,2157,3616,apple asian carriers are making iphone cheaper...,0.182011,0.467047
4,2015-01-11,"Caturday: There is no MacBook, there is only Z...",861,1920,3519,caturday there is no macbook there is only zuu...,0.184405,0.46369
5,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",789,1760,3319,the binary options competition win lnkd l gool...,0.185916,0.462454
6,2015-01-13,What's Driving Our $30 Billion Valuation For A...,809,1989,3399,what s driving our billion valuation for apple...,0.183448,0.457585
7,2015-01-14,The Storage Lawsuit Against Apple Is Beyond Fr...,817,2049,3272,the storage lawsuit against apple is beyond fr...,0.173973,0.455505
8,2015-01-15,Apple 2015 Outlook: Where Do We Go From Here? ...,711,1826,2812,apple outlook where do we go from here qihu ba...,0.167914,0.44767
9,2015-01-16,Apple's App Store Will Surprise To The Upside ...,690,1929,2561,apple s app store will surprise to the upside ...,0.151119,0.445665


In [None]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment7damazon = []

for index, row in comb_tweets_amazon7d.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment7damazon.append(s)

print(sentiment7damazon)

KeyboardInterrupt: ignored

In [None]:
# converting the sentiment values to a dataframe
sentiment7damazon_df = pd.DataFrame(sentiment7damazon)
print(sentiment7damazon_df)

In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_amazon7d['neg'],comb_tweets_amazon7d['neu'],comb_tweets_amazon7d['pos'],comb_tweets_amazon7d['compound'] = sentiment7damazon_df['neg'], sentiment7damazon_df['neu'], sentiment7damazon_df['pos'], sentiment7damazon_df['compound'] 

comb_tweets_amazon7d.head(10)

In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt7damazon = []

for index, row in comb_tweets_amazon7d.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt7damazon.append(snt)

In [None]:
print(sntmnt7damazon)

In [None]:
comb_tweets_amazon7d['sentiment'] = sntmnt7damazon

In [None]:
# exporting the third dataframe
comb_tweets_amazon7d.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Amazon/comb_tweets_amazon7d_snt.csv')

### Dataset - 4 : Previous Day Tweets

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_amazonprev['polarity'] = comb_tweets_amazonprev['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_amazonprev['subjectivity'] = comb_tweets_amazonprev['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_amazonprev.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",6,33,50,lx21 made on check it out learn exe watt imrs ...,0.067588,0.469103
1,2015-01-02,@davenathan I just expected something more ext...,52,1240,408,i just expected something more extravagant lik...,0.166638,0.469114
2,2015-01-03,The Closing Bell is out! http://paper.li/BonaC...,108,1858,439,the closing bell is out eurusd gbpusd usdjpy a...,0.146284,0.446422
3,2015-01-04,The Closing Bell is out! http://paper.li/BonaC...,15,62,133,the closing bell is out eurusd gbpusd usdjpy a...,0.216906,0.507264
4,2015-01-05,#Apple : Price Target Raised to $125.00 at Arg...,74,385,411,price target raised to at argus free in accoun...,0.228005,0.496103
5,2015-01-06,Stock Contest!! Pick $COP and WIN a FREE Table...,128,373,536,stock contest pick cop and win a free tablet e...,0.159218,0.460904
6,2015-01-07,Apple reports earnings on January 27 http://m....,141,254,525,apple reports earnings on january join me on t...,0.15465,0.461288
7,2015-01-08,More U.S. dollar damage $AAPL:$USD - http://sc...,91,196,516,more u s dollar damage usd here s how apple co...,0.172645,0.453962
8,2015-01-09,Those stocks? $AAPL $BABA $TSLA $VA ... If sh...,267,487,877,those stocks baba va if she had let me do i wo...,0.194843,0.482668
9,2015-01-10,"Join me on tsū, is sharing social revenues wit...",166,394,587,join me on ts is sharing social revenues with ...,0.179049,0.466481


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentimentprevamazon = []

for index, row in comb_tweets_amazonprev.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentimentprevamazon.append(s)

print(sentimentprevamazon)

[{'neg': 0.041, 'neu': 0.894, 'pos': 0.065, 'compound': 0.9354}, {'neg': 0.04, 'neu': 0.835, 'pos': 0.125, 'compound': 0.9998}, {'neg': 0.058, 'neu': 0.794, 'pos': 0.148, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.754, 'pos': 0.2, 'compound': 1.0}, {'neg': 0.043, 'neu': 0.758, 'pos': 0.198, 'compound': 1.0}, {'neg': 0.067, 'neu': 0.778, 'pos': 0.155, 'compound': 1.0}, {'neg': 0.07, 'neu': 0.756, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.052, 'neu': 0.776, 'pos': 0.172, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.8, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.061, 'neu': 0.798, 'pos': 0.142, 'compound': 1.0}, {'neg': 0.022, 'neu': 0.839, 'pos': 0.139, 'compound': 0.9999}, {'neg': 0.014, 'neu': 0.845, 'pos': 0.141, 'compound': 0.9999}, {'neg': 0.049, 'neu': 0.785, 'pos': 0.166, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.772, 'pos': 0.172, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.796, 'pos': 0.138, 'compound': 1.0}, {'neg': 0.079, 'neu': 0.782, 'pos': 0.139, 'compound': 1.0}, {'neg': 0.078, 'n

In [None]:
# converting the sentiment values to a dataframe
sentimentprevamazon_df = pd.DataFrame(sentimentprevamazon)
print(sentimentprevamazon_df)

        neg    neu    pos  compound
0     0.041  0.894  0.065    0.9354
1     0.040  0.835  0.125    0.9998
2     0.058  0.794  0.148    1.0000
3     0.046  0.754  0.200    1.0000
4     0.043  0.758  0.198    1.0000
...     ...    ...    ...       ...
1821  0.059  0.793  0.149    1.0000
1822  0.050  0.788  0.163    0.9999
1823  0.059  0.800  0.142    0.9999
1824  0.051  0.779  0.169    1.0000
1825  0.055  0.807  0.138    1.0000

[1826 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_amazonprev['neg'],comb_tweets_amazonprev['neu'],comb_tweets_amazonprev['pos'],comb_tweets_amazonprev['compound'] = sentimentprevamazon_df['neg'], sentimentprevamazon_df['neu'], sentimentprevamazon_df['pos'], sentimentprevamazon_df['compound'] 

comb_tweets_amazonprev.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",6,33,50,lx21 made on check it out learn exe watt imrs ...,0.067588,0.469103,0.041,0.894,0.065,0.9354
1,2015-01-02,@davenathan I just expected something more ext...,52,1240,408,i just expected something more extravagant lik...,0.166638,0.469114,0.04,0.835,0.125,0.9998
2,2015-01-03,The Closing Bell is out! http://paper.li/BonaC...,108,1858,439,the closing bell is out eurusd gbpusd usdjpy a...,0.146284,0.446422,0.058,0.794,0.148,1.0
3,2015-01-04,The Closing Bell is out! http://paper.li/BonaC...,15,62,133,the closing bell is out eurusd gbpusd usdjpy a...,0.216906,0.507264,0.046,0.754,0.2,1.0
4,2015-01-05,#Apple : Price Target Raised to $125.00 at Arg...,74,385,411,price target raised to at argus free in accoun...,0.228005,0.496103,0.043,0.758,0.198,1.0
5,2015-01-06,Stock Contest!! Pick $COP and WIN a FREE Table...,128,373,536,stock contest pick cop and win a free tablet e...,0.159218,0.460904,0.067,0.778,0.155,1.0
6,2015-01-07,Apple reports earnings on January 27 http://m....,141,254,525,apple reports earnings on january join me on t...,0.15465,0.461288,0.07,0.756,0.174,1.0
7,2015-01-08,More U.S. dollar damage $AAPL:$USD - http://sc...,91,196,516,more u s dollar damage usd here s how apple co...,0.172645,0.453962,0.052,0.776,0.172,1.0
8,2015-01-09,Those stocks? $AAPL $BABA $TSLA $VA ... If sh...,267,487,877,those stocks baba va if she had let me do i wo...,0.194843,0.482668,0.046,0.8,0.154,1.0
9,2015-01-10,"Join me on tsū, is sharing social revenues wit...",166,394,587,join me on ts is sharing social revenues with ...,0.179049,0.466481,0.061,0.798,0.142,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmntprevamazon = []

for index, row in comb_tweets_amazonprev.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmntprevamazon.append(snt)

In [None]:
print(sntmntprevamazon)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_amazonprev['sentiment'] = sntmntprevamazon

In [None]:
# exporting the first dataframe
comb_tweets_amazonprev.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Amazon/comb_tweets_amazonprev_snt.csv')

# **For Microsoft**

## **Dataset Cleaning**

### **Dataset 1 : Same Day Tweets**

In [None]:
# importing the dataset
comb_tweets_microsoft1 = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Microsoft/comb_tweets_microsoft1.csv')
comb_tweets_microsoft1 = comb_tweets_microsoft1.drop(columns=['Unnamed: 0'])
display(comb_tweets_microsoft1)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554
...,...,...,...,...,...
1822,2019-12-28,Some top money flow for Dec 27Start your free ...,127,545,1579
1823,2019-12-29,Notable activity on social media in the past w...,74,307,528
1824,2019-12-30,Siguenos en telegram https://t.me/tradingbalea...,271,725,1821
1825,2019-12-31,Some top money flow for Dec 30Start your free ...,244,404,2088


In [None]:
# extracting the tweet body column
comb_tweets_microsoft1['clean_text'] = comb_tweets_microsoft1['body']
display(comb_tweets_microsoft1['clean_text'])

0       lx21 made $10,008  on $AAPL -Check it out! htt...
1       $aapl could be a problem sitting on support th...
2       Zacks' Bull Of The Day: Apple http://seekingal...
3       Free 5€ in account balance for first 100.000 m...
4       Free 5€ in account balance for first 100.000 m...
                              ...                        
1822    Some top money flow for Dec 27Start your free ...
1823    Notable activity on social media in the past w...
1824    Siguenos en telegram https://t.me/tradingbalea...
1825    Some top money flow for Dec 30Start your free ...
1826    Popular: $AAPL, $IWM, $BTAI, $FTNT, $ALIM, $BP...
Name: clean_text, Length: 1827, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_microsoft1['clean_text'] = comb_tweets_microsoft1['clean_text'].str.lower()

In [None]:
# viewing the cleaned tweets
comb_tweets_microsoft1['clean_text'] = comb_tweets_microsoft1['clean_text'].apply(lambda x: clean_tweets(x))
display(comb_tweets_microsoft1['clean_text'].head())

0    lx21 made on check it out learn exe watt imrs ...
1    could be a problem sitting on support that bre...
2    zacks bull of the day apple have a great weeke...
3    free in account balance for first members lnkd...
4    free in account balance for first members lnkd...
Name: clean_text, dtype: object

### **Dataset - 2 : Last 3 Days Tweets**

In [None]:
# importing the dataset
comb_tweets_microsoft3d = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Microsoft/comb_tweets_microsoft3d.csv')
comb_tweets_microsoft3d = comb_tweets_microsoft3d.drop(columns=['Unnamed: 0'])
print(comb_tweets_microsoft3d)

     updated_dates  ... like_num
0       2015-01-03  ...      981
1       2015-01-04  ...     1077
2       2015-01-05  ...     1069
3       2015-01-06  ...     1319
4       2015-01-07  ...     1572
...            ...  ...      ...
1820    2019-12-28  ...     4261
1821    2019-12-29  ...     3332
1822    2019-12-30  ...     3928
1823    2019-12-31  ...     4437
1824    2020-01-01  ...     4041

[1825 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_microsoft3d['clean_text'] = comb_tweets_microsoft3d['body']
display(comb_tweets_microsoft3d)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,Zacks' Bull Of The Day: Apple http://seekingal...
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,Free 5€ in account balance for first 100.000 m...
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,Free 5€ in account balance for first 100.000 m...
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,Apple: Does The Party End In 2015? http://seek...
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,The Ever-Changing World Of Apple http://seekin...
...,...,...,...,...,...,...
1820,2019-12-28,Some top money flow for Dec 27Start your free ...,532,1081,4261,Some top money flow for Dec 27Start your free ...
1821,2019-12-29,Notable activity on social media in the past w...,397,1035,3332,Notable activity on social media in the past w...
1822,2019-12-30,Siguenos en telegram https://t.me/tradingbalea...,472,1577,3928,Siguenos en telegram https://t.me/tradingbalea...
1823,2019-12-31,Some top money flow for Dec 30Start your free ...,589,1436,4437,Some top money flow for Dec 30Start your free ...


In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_microsoft3d['clean_text'] = comb_tweets_microsoft3d['clean_text'].str.lower()

In [None]:
comb_tweets_microsoft3d['clean_text'] = comb_tweets_microsoft3d['clean_text'].apply(lambda x: clean_tweets(x))
display(comb_tweets_microsoft3d['clean_text'].head())

0    zacks bull of the day apple have a great weeke...
1    free in account balance for first members lnkd...
2    free in account balance for first members lnkd...
3    apple does the party end in long with successf...
4    the ever changing world of apple what dean kar...
Name: clean_text, dtype: object

### **Dataset - 3 : Last 7 Days Tweets**

In [None]:
# importing the dataset
comb_tweets_microsoft7d = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/microsoft/comb_tweets_microsoft7d.csv')
comb_tweets_microsoft7d = comb_tweets_microsoft7d.drop(columns=['Unnamed: 0'])
print(comb_tweets_microsoft7d)

     updated_dates  ... like_num
0       2015-01-07  ...     2876
1       2015-01-08  ...     3472
2       2015-01-09  ...     3522
3       2015-01-10  ...     3616
4       2015-01-11  ...     3519
...            ...  ...      ...
1816    2019-12-28  ...     8743
1817    2019-12-29  ...     8104
1818    2019-12-30  ...     8661
1819    2019-12-31  ...     9614
1820    2020-01-01  ...     8830

[1821 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_microsoft7d['clean_text'] = comb_tweets_microsoft7d['body']
display(comb_tweets_microsoft7d['clean_text'])

0       The Ever-Changing World Of Apple http://seekin...
1       One Thing Apple Should Do To Preserve Its Mark...
2       How iPhone Demand Could Impact Apple's Q2 Guid...
3       Apple: Asian Carriers Are Making iPhone 6 Chea...
4       Caturday: There is no MacBook, there is only Z...
                              ...                        
1816    Some top money flow for Dec 27Start your free ...
1817    Notable activity on social media in the past w...
1818    Siguenos en telegram https://t.me/tradingbalea...
1819    Some top money flow for Dec 30Start your free ...
1820    Popular: $AAPL, $IWM, $BTAI, $FTNT, $ALIM, $BP...
Name: clean_text, Length: 1821, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_microsoft7d['clean_text'] = comb_tweets_microsoft7d['clean_text'].str.lower()


In [None]:
comb_tweets_microsoft7d['clean_text'] = comb_tweets_microsoft7d['clean_text'].apply(lambda x: clean_tweets(x))
print(comb_tweets_microsoft7d['clean_text'].head())

0    the ever changing world of apple what dean kar...
1    one thing apple should do to preserve its mark...
2    how iphone demand could impact apple s q2 guid...
3    apple asian carriers are making iphone cheaper...
4    caturday there is no macbook there is only zuu...
Name: clean_text, dtype: object


### **Dataset - 4 : Previous Days Tweets**

In [None]:
# importing the dataset
comb_tweets_microsoftprev = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Microsoft/comb_tweets_microsoftprev.csv')
comb_tweets_microsoftprev = comb_tweets_microsoftprev.drop(columns=['Unnamed: 0'])
print(comb_tweets_microsoftprev)

     updated_dates  ... like_num
0       2015-01-01  ...       50
1       2015-01-02  ...      408
2       2015-01-03  ...      439
3       2015-01-04  ...      133
4       2015-01-05  ...      411
...            ...  ...      ...
1821    2019-12-28  ...     1058
1822    2019-12-29  ...     1460
1823    2019-12-30  ...      577
1824    2019-12-31  ...     2684
1825    2020-01-01  ...     1081

[1826 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_microsoftprev['clean_text'] = comb_tweets_microsoftprev['body']
display(comb_tweets_microsoftprev['clean_text'].head())

0    lx21 made $10,008  on $AAPL -Check it out! htt...
1    @davenathan I just expected something more ext...
2    The Closing Bell is out! http://paper.li/BonaC...
3    The Closing Bell is out! http://paper.li/BonaC...
4    #Apple : Price Target Raised to $125.00 at Arg...
Name: clean_text, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase and saving it in a new column
comb_tweets_microsoftprev['clean_text'] = comb_tweets_microsoftprev['clean_text'].str.lower()


In [None]:
comb_tweets_microsoftprev['clean_text'] = comb_tweets_microsoftprev['clean_text'].apply(lambda x: clean_tweets(x))
print(comb_tweets_microsoftprev['clean_text'].head())

0    lx21 made on check it out learn exe watt imrs ...
1    i just expected something more extravagant lik...
2    the closing bell is out eurusd gbpusd usdjpy a...
3    the closing bell is out eurusd gbpusd usdjpy a...
4    price target raised to at argus free in accoun...
Name: clean_text, dtype: object


## **Finding Sentiment Analysis Values**

### **Dataset - 1 : Same Day Tweets**

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_microsoft1['polarity'] = comb_tweets_microsoft1['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_microsoft1['subjectivity'] = comb_tweets_microsoft1['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_microsoft1.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227,lx21 made on check it out learn exe watt imrs ...,0.152047,0.469165
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562,could be a problem sitting on support that bre...,0.137389,0.445212
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192,zacks bull of the day apple have a great weeke...,0.186061,0.49581
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323,free in account balance for first members lnkd...,0.214288,0.494194
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554,free in account balance for first members lnkd...,0.171292,0.464973
5,2015-01-06,Apple: Does The Party End In 2015? http://seek...,115,228,442,apple does the party end in long with successf...,0.15294,0.461804
6,2015-01-07,The Ever-Changing World Of Apple http://seekin...,105,209,576,the ever changing world of apple what dean kar...,0.170265,0.452655
7,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,251,428,823,one thing apple should do to preserve its mark...,0.189625,0.4805
8,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,177,364,612,how iphone demand could impact apple s q2 guid...,0.186721,0.474236
9,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,50,208,286,apple asian carriers are making iphone cheaper...,0.260925,0.425825


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment1microsoft = []

for index, row in comb_tweets_microsoft1.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment1microsoft.append(s)

print(sentiment1microsoft)

[{'neg': 0.049, 'neu': 0.795, 'pos': 0.156, 'compound': 0.9999}, {'neg': 0.055, 'neu': 0.825, 'pos': 0.12, 'compound': 0.9999}, {'neg': 0.047, 'neu': 0.772, 'pos': 0.181, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.762, 'pos': 0.193, 'compound': 1.0}, {'neg': 0.06, 'neu': 0.773, 'pos': 0.167, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.752, 'pos': 0.182, 'compound': 1.0}, {'neg': 0.055, 'neu': 0.781, 'pos': 0.164, 'compound': 1.0}, {'neg': 0.048, 'neu': 0.787, 'pos': 0.164, 'compound': 1.0}, {'neg': 0.054, 'neu': 0.786, 'pos': 0.159, 'compound': 1.0}, {'neg': 0.029, 'neu': 0.795, 'pos': 0.176, 'compound': 1.0}, {'neg': 0.036, 'neu': 0.752, 'pos': 0.212, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.795, 'pos': 0.155, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.76, 'pos': 0.188, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.78, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.075, 'neu': 0.785, 'pos': 0.14, 'compound': 1.0}, {'neg': 0.074, 'neu': 0.788, 'pos': 0.137, 'compound': 1.0}, {'neg': 0.058, 'neu': 

In [None]:
# converting the sentiment values to a dataframe
sentiment1microsoft_df = pd.DataFrame(sentiment1microsoft)
print(sentiment1microsoft_df)

        neg    neu    pos  compound
0     0.049  0.795  0.156    0.9999
1     0.055  0.825  0.120    0.9999
2     0.047  0.772  0.181    1.0000
3     0.046  0.762  0.193    1.0000
4     0.060  0.773  0.167    1.0000
...     ...    ...    ...       ...
1822  0.044  0.820  0.136    0.9999
1823  0.047  0.811  0.142    0.9999
1824  0.051  0.776  0.173    1.0000
1825  0.052  0.808  0.140    1.0000
1826  0.074  0.784  0.143    0.9988

[1827 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_microsoft1['neg'],comb_tweets_microsoft1['neu'],comb_tweets_microsoft1['pos'],comb_tweets_microsoft1['compound'] = sentiment1microsoft_df['neg'], sentiment1microsoft_df['neu'], sentiment1microsoft_df['pos'], sentiment1microsoft_df['compound'] 

comb_tweets_microsoft1.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227,lx21 made on check it out learn exe watt imrs ...,0.152047,0.469165,0.049,0.795,0.156,0.9999
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562,could be a problem sitting on support that bre...,0.137389,0.445212,0.055,0.825,0.12,0.9999
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192,zacks bull of the day apple have a great weeke...,0.186061,0.49581,0.047,0.772,0.181,1.0
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323,free in account balance for first members lnkd...,0.214288,0.494194,0.046,0.762,0.193,1.0
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554,free in account balance for first members lnkd...,0.171292,0.464973,0.06,0.773,0.167,1.0
5,2015-01-06,Apple: Does The Party End In 2015? http://seek...,115,228,442,apple does the party end in long with successf...,0.15294,0.461804,0.066,0.752,0.182,1.0
6,2015-01-07,The Ever-Changing World Of Apple http://seekin...,105,209,576,the ever changing world of apple what dean kar...,0.170265,0.452655,0.055,0.781,0.164,1.0
7,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,251,428,823,one thing apple should do to preserve its mark...,0.189625,0.4805,0.048,0.787,0.164,1.0
8,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,177,364,612,how iphone demand could impact apple s q2 guid...,0.186721,0.474236,0.054,0.786,0.159,1.0
9,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,50,208,286,apple asian carriers are making iphone cheaper...,0.260925,0.425825,0.029,0.795,0.176,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sentiment1microsoft = []

for index, row in comb_tweets_microsofsentiment1microsoft1.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sentiment1microsoft.append(snt)

In [None]:
print(sntmnt1microsoft)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_microsoft1['sentiment'] = sntmnt1microsoft
print(comb_tweets_microsoft1.head())

  updated_dates                                               body  \
0    2015-01-01  lx21 made $10,008  on $AAPL -Check it out! htt...   
1    2015-01-02  $aapl could be a problem sitting on support th...   
2    2015-01-03  Zacks' Bull Of The Day: Apple http://seekingal...   
3    2015-01-04  Free 5€ in account balance for first 100.000 m...   
4    2015-01-05  Free 5€ in account balance for first 100.000 m...   

   comment_num  retweet_num  like_num  \
0           17          968       227   
1          128         2102       562   
2           31          103       192   
3           52          328       323   
4          139          392       554   

                                          clean_text  polarity  subjectivity  \
0  lx21 made on check it out learn exe watt imrs ...  0.152047      0.469165   
1  could be a problem sitting on support that bre...  0.137389      0.445212   
2  zacks bull of the day apple have a great weeke...  0.186061      0.495810   
3  free in a

In [None]:
# exporting the first dataframe
comb_tweets_microsoft1.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Microsoft/comb_tweets_microsoft1_snt.csv')

### Dataset - 2 : Last 3 Days Tweets

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_microsoft3d['polarity'] = comb_tweets_microsoft3d['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_microsoft3d['subjectivity'] = comb_tweets_microsoft3d['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_microsoft3d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,zacks bull of the day apple have a great weeke...,0.158465,0.468734
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,free in account balance for first members lnkd...,0.175851,0.476125
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,free in account balance for first members lnkd...,0.183886,0.47845
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,apple does the party end in long with successf...,0.169583,0.468
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,the ever changing world of apple what dean kar...,0.164246,0.459994
5,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,471,865,1841,one thing apple should do to preserve its mark...,0.172146,0.466585
6,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,533,1001,2011,how iphone demand could impact apple s q2 guid...,0.183188,0.47059
7,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,478,1000,1721,apple asian carriers are making iphone cheaper...,0.197525,0.4712
8,2015-01-11,"Caturday: There is no MacBook, there is only Z...",251,663,1124,caturday there is no macbook there is only zuu...,0.217169,0.457028
9,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",141,531,866,the binary options competition win lnkd l gool...,0.220221,0.442879


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment3dmicrosoft = []

for index, row in comb_tweets_microsoft3d.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment3dmicrosoft.append(s)

print(sentiment3dmicrosoft)

[{'neg': 0.059, 'neu': 0.786, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.049, 'neu': 0.776, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.053, 'neu': 0.767, 'pos': 0.18, 'compound': 1.0}, {'neg': 0.063, 'neu': 0.76, 'pos': 0.177, 'compound': 1.0}, {'neg': 0.06, 'neu': 0.766, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.058, 'neu': 0.773, 'pos': 0.169, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.784, 'pos': 0.165, 'compound': 1.0}, {'neg': 0.047, 'neu': 0.787, 'pos': 0.166, 'compound': 1.0}, {'neg': 0.048, 'neu': 0.781, 'pos': 0.171, 'compound': 1.0}, {'neg': 0.042, 'neu': 0.784, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.049, 'neu': 0.768, 'pos': 0.182, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.774, 'pos': 0.169, 'compound': 1.0}, {'neg': 0.059, 'neu': 0.77, 'pos': 0.171, 'compound': 1.0}, {'neg': 0.07, 'neu': 0.783, 'pos': 0.147, 'compound': 1.0}, {'neg': 0.073, 'neu': 0.788, 'pos': 0.139, 'compound': 1.0}, {'neg': 0.071, 'neu': 0.78, 'pos': 0.149, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.788, 

In [None]:
# converting the sentiment values to a dataframe
sentiment3dmicrosoft_df = pd.DataFrame(sentiment3dmicrosoft)
print(sentiment3dmicrosoft_df)

        neg    neu    pos  compound
0     0.059  0.786  0.154       1.0
1     0.049  0.776  0.174       1.0
2     0.053  0.767  0.180       1.0
3     0.063  0.760  0.177       1.0
4     0.060  0.766  0.174       1.0
...     ...    ...    ...       ...
1820  0.054  0.795  0.151       1.0
1821  0.054  0.796  0.149       1.0
1822  0.049  0.787  0.164       1.0
1823  0.053  0.792  0.155       1.0
1824  0.055  0.791  0.154       1.0

[1825 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_microsoft3d['neg'],comb_tweets_microsoft3d['neu'],comb_tweets_microsoft3d['pos'],comb_tweets_microsoft3d['compound'] = sentiment3dmicrosoft_df['neg'], sentiment3dmicrosoft_df['neu'], sentiment3dmicrosoft_df['pos'], sentiment3dmicrosoft_df['compound'] 

comb_tweets_microsoft3d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,zacks bull of the day apple have a great weeke...,0.158465,0.468734,0.059,0.786,0.154,1.0
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,free in account balance for first members lnkd...,0.175851,0.476125,0.049,0.776,0.174,1.0
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,free in account balance for first members lnkd...,0.183886,0.47845,0.053,0.767,0.18,1.0
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,apple does the party end in long with successf...,0.169583,0.468,0.063,0.76,0.177,1.0
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,the ever changing world of apple what dean kar...,0.164246,0.459994,0.06,0.766,0.174,1.0
5,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,471,865,1841,one thing apple should do to preserve its mark...,0.172146,0.466585,0.058,0.773,0.169,1.0
6,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,533,1001,2011,how iphone demand could impact apple s q2 guid...,0.183188,0.47059,0.051,0.784,0.165,1.0
7,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,478,1000,1721,apple asian carriers are making iphone cheaper...,0.197525,0.4712,0.047,0.787,0.166,1.0
8,2015-01-11,"Caturday: There is no MacBook, there is only Z...",251,663,1124,caturday there is no macbook there is only zuu...,0.217169,0.457028,0.048,0.781,0.171,1.0
9,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",141,531,866,the binary options competition win lnkd l gool...,0.220221,0.442879,0.042,0.784,0.174,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt3dmicrosoft = []

for index, row in comb_tweets_microsoft3d.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt3dmicrosoft.append(snt)

In [None]:
print(sntmnt3dmicrosoft)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_microsoft3d['sentiment'] = sntmnt3dmicrosoft

In [None]:
# exporting the second dataframe
comb_tweets_microsoft3d.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Microsoft/comb_tweets_microsoft3d_snt.csv')

### Dataset - 3 : Last 7 Days Tweets

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_microsoft7d['polarity'] = comb_tweets_microsoft7d['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_microsoft7d['subjectivity'] = comb_tweets_microsoft7d['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_microsoft7d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-07,The Ever-Changing World Of Apple http://seekin...,587,4330,2876,the ever changing world of apple what dean kar...,0.166986,0.464993
1,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,821,3790,3472,one thing apple should do to preserve its mark...,0.172842,0.468477
2,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,870,2052,3522,how iphone demand could impact apple s q2 guid...,0.177947,0.471271
3,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,889,2157,3616,apple asian carriers are making iphone cheaper...,0.182011,0.467047
4,2015-01-11,"Caturday: There is no MacBook, there is only Z...",861,1920,3519,caturday there is no macbook there is only zuu...,0.184405,0.46369
5,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",789,1760,3319,the binary options competition win lnkd l gool...,0.185916,0.462454
6,2015-01-13,What's Driving Our $30 Billion Valuation For A...,809,1989,3399,what s driving our billion valuation for apple...,0.183448,0.457585
7,2015-01-14,The Storage Lawsuit Against Apple Is Beyond Fr...,817,2049,3272,the storage lawsuit against apple is beyond fr...,0.173973,0.455505
8,2015-01-15,Apple 2015 Outlook: Where Do We Go From Here? ...,711,1826,2812,apple outlook where do we go from here qihu ba...,0.167914,0.44767
9,2015-01-16,Apple's App Store Will Surprise To The Upside ...,690,1929,2561,apple s app store will surprise to the upside ...,0.151119,0.445665


In [None]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment7dmicrosoft = []

for index, row in comb_tweets_microsoft7d.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment7dmicrosoft.append(s)

print(sentiment7dmicrosoft)

KeyboardInterrupt: ignored

In [None]:
# converting the sentiment values to a dataframe
sentiment7dmicrosoft_df = pd.DataFrame(sentiment7dmicrosoft)
print(sentiment7dmicrosoft_df)

In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_microsoft7d['neg'],comb_tweets_microsoft7d['neu'],comb_tweets_microsoft7d['pos'],comb_tweets_microsoft7d['compound'] = sentiment7dmicrosoft_df['neg'], sentiment7dmicrosoft_df['neu'], sentiment7dmicrosoft_df['pos'], sentiment7dmicrosoft_df['compound'] 

comb_tweets_microsoft7d.head(10)

In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt7dmicrosoft = []

for index, row in comb_tweets_microsoft7d.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt7dmicrosoft.append(snt)

In [None]:
print(sntmnt7dmicrosoft)

In [None]:
comb_tweets_microsoft7d['sentiment'] = sntmnt7dmicrosoft

In [None]:
# exporting the third dataframe
comb_tweets_microsoft7d.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Microsoft/comb_tweets_microsoft7d_snt.csv')

### Dataset - 4 : Previous Day Tweets

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_microsoftprev['polarity'] = comb_tweets_microsoftprev['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_microsoftprev['subjectivity'] = comb_tweets_microsoftprev['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_microsoftprev.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",6,33,50,lx21 made on check it out learn exe watt imrs ...,0.067588,0.469103
1,2015-01-02,@davenathan I just expected something more ext...,52,1240,408,i just expected something more extravagant lik...,0.166638,0.469114
2,2015-01-03,The Closing Bell is out! http://paper.li/BonaC...,108,1858,439,the closing bell is out eurusd gbpusd usdjpy a...,0.146284,0.446422
3,2015-01-04,The Closing Bell is out! http://paper.li/BonaC...,15,62,133,the closing bell is out eurusd gbpusd usdjpy a...,0.216906,0.507264
4,2015-01-05,#Apple : Price Target Raised to $125.00 at Arg...,74,385,411,price target raised to at argus free in accoun...,0.228005,0.496103
5,2015-01-06,Stock Contest!! Pick $COP and WIN a FREE Table...,128,373,536,stock contest pick cop and win a free tablet e...,0.159218,0.460904
6,2015-01-07,Apple reports earnings on January 27 http://m....,141,254,525,apple reports earnings on january join me on t...,0.15465,0.461288
7,2015-01-08,More U.S. dollar damage $AAPL:$USD - http://sc...,91,196,516,more u s dollar damage usd here s how apple co...,0.172645,0.453962
8,2015-01-09,Those stocks? $AAPL $BABA $TSLA $VA ... If sh...,267,487,877,those stocks baba va if she had let me do i wo...,0.194843,0.482668
9,2015-01-10,"Join me on tsū, is sharing social revenues wit...",166,394,587,join me on ts is sharing social revenues with ...,0.179049,0.466481


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentimentprevmicrosoft = []

for index, row in comb_tweets_microsoftprev.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentimentprevmicrosoft.append(s)

print(sentimentprevmicrosoft)

[{'neg': 0.041, 'neu': 0.894, 'pos': 0.065, 'compound': 0.9354}, {'neg': 0.04, 'neu': 0.835, 'pos': 0.125, 'compound': 0.9998}, {'neg': 0.058, 'neu': 0.794, 'pos': 0.148, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.754, 'pos': 0.2, 'compound': 1.0}, {'neg': 0.043, 'neu': 0.758, 'pos': 0.198, 'compound': 1.0}, {'neg': 0.067, 'neu': 0.778, 'pos': 0.155, 'compound': 1.0}, {'neg': 0.07, 'neu': 0.756, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.052, 'neu': 0.776, 'pos': 0.172, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.8, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.061, 'neu': 0.798, 'pos': 0.142, 'compound': 1.0}, {'neg': 0.022, 'neu': 0.839, 'pos': 0.139, 'compound': 0.9999}, {'neg': 0.014, 'neu': 0.845, 'pos': 0.141, 'compound': 0.9999}, {'neg': 0.049, 'neu': 0.785, 'pos': 0.166, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.772, 'pos': 0.172, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.796, 'pos': 0.138, 'compound': 1.0}, {'neg': 0.079, 'neu': 0.782, 'pos': 0.139, 'compound': 1.0}, {'neg': 0.078, 'n

In [None]:
# converting the sentiment values to a dataframe
sentimentprevmicrosoft_df = pd.DataFrame(sentimentprevmicrosoft)
print(sentimentprevmicrosoft_df)

        neg    neu    pos  compound
0     0.041  0.894  0.065    0.9354
1     0.040  0.835  0.125    0.9998
2     0.058  0.794  0.148    1.0000
3     0.046  0.754  0.200    1.0000
4     0.043  0.758  0.198    1.0000
...     ...    ...    ...       ...
1821  0.059  0.793  0.149    1.0000
1822  0.050  0.788  0.163    0.9999
1823  0.059  0.800  0.142    0.9999
1824  0.051  0.779  0.169    1.0000
1825  0.055  0.807  0.138    1.0000

[1826 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_microsoftprev['neg'],comb_tweets_microsoftprev['neu'],comb_tweets_microsoftprev['pos'],comb_tweets_microsoftprev['compound'] = sentimentprevmicrosoft_df['neg'], sentimentprevmicrosoft_df['neu'], sentimentprevmicrosoft_df['pos'], sentimentprevmicrosoft_df['compound'] 

comb_tweets_microsoftprev.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",6,33,50,lx21 made on check it out learn exe watt imrs ...,0.067588,0.469103,0.041,0.894,0.065,0.9354
1,2015-01-02,@davenathan I just expected something more ext...,52,1240,408,i just expected something more extravagant lik...,0.166638,0.469114,0.04,0.835,0.125,0.9998
2,2015-01-03,The Closing Bell is out! http://paper.li/BonaC...,108,1858,439,the closing bell is out eurusd gbpusd usdjpy a...,0.146284,0.446422,0.058,0.794,0.148,1.0
3,2015-01-04,The Closing Bell is out! http://paper.li/BonaC...,15,62,133,the closing bell is out eurusd gbpusd usdjpy a...,0.216906,0.507264,0.046,0.754,0.2,1.0
4,2015-01-05,#Apple : Price Target Raised to $125.00 at Arg...,74,385,411,price target raised to at argus free in accoun...,0.228005,0.496103,0.043,0.758,0.198,1.0
5,2015-01-06,Stock Contest!! Pick $COP and WIN a FREE Table...,128,373,536,stock contest pick cop and win a free tablet e...,0.159218,0.460904,0.067,0.778,0.155,1.0
6,2015-01-07,Apple reports earnings on January 27 http://m....,141,254,525,apple reports earnings on january join me on t...,0.15465,0.461288,0.07,0.756,0.174,1.0
7,2015-01-08,More U.S. dollar damage $AAPL:$USD - http://sc...,91,196,516,more u s dollar damage usd here s how apple co...,0.172645,0.453962,0.052,0.776,0.172,1.0
8,2015-01-09,Those stocks? $AAPL $BABA $TSLA $VA ... If sh...,267,487,877,those stocks baba va if she had let me do i wo...,0.194843,0.482668,0.046,0.8,0.154,1.0
9,2015-01-10,"Join me on tsū, is sharing social revenues wit...",166,394,587,join me on ts is sharing social revenues with ...,0.179049,0.466481,0.061,0.798,0.142,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmntprevmicrosoft = []

for index, row in comb_tweets_microsoftprev.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmntprevmicrosoft.append(snt)

In [None]:
print(sntmntprevmicrosoft)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_microsoftprev['sentiment'] = sntmntprevmicrosoft

In [None]:
# exporting the first dataframe
comb_tweets_microsoftprev.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Microsoft/comb_tweets_microsoftprev_snt.csv')

# **For Goog**

## **Dataset Cleaning**

### **Dataset 1 : Same Day Tweets**

In [None]:
# importing the dataset
comb_tweets_goog1 = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Goog/comb_tweets_goog1.csv')
comb_tweets_goog1 = comb_tweets_goog1.drop(columns=['Unnamed: 0'])
display(comb_tweets_goog1)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554
...,...,...,...,...,...
1822,2019-12-28,Some top money flow for Dec 27Start your free ...,127,545,1579
1823,2019-12-29,Notable activity on social media in the past w...,74,307,528
1824,2019-12-30,Siguenos en telegram https://t.me/tradingbalea...,271,725,1821
1825,2019-12-31,Some top money flow for Dec 30Start your free ...,244,404,2088


In [None]:
# extracting the tweet body column
comb_tweets_goog1['clean_text'] = comb_tweets_goog1['body']
display(comb_tweets_goog1['clean_text'])

0       lx21 made $10,008  on $AAPL -Check it out! htt...
1       $aapl could be a problem sitting on support th...
2       Zacks' Bull Of The Day: Apple http://seekingal...
3       Free 5€ in account balance for first 100.000 m...
4       Free 5€ in account balance for first 100.000 m...
                              ...                        
1822    Some top money flow for Dec 27Start your free ...
1823    Notable activity on social media in the past w...
1824    Siguenos en telegram https://t.me/tradingbalea...
1825    Some top money flow for Dec 30Start your free ...
1826    Popular: $AAPL, $IWM, $BTAI, $FTNT, $ALIM, $BP...
Name: clean_text, Length: 1827, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_goog1['clean_text'] = comb_tweets_goog1['clean_text'].str.lower()

In [None]:
# viewing the cleaned tweets
comb_tweets_goog1['clean_text'] = comb_tweets_goog1['clean_text'].apply(lambda x: clean_tweets(x))
display(comb_tweets_goog1['clean_text'].head())

0    lx21 made on check it out learn exe watt imrs ...
1    could be a problem sitting on support that bre...
2    zacks bull of the day apple have a great weeke...
3    free in account balance for first members lnkd...
4    free in account balance for first members lnkd...
Name: clean_text, dtype: object

### **Dataset - 2 : Last 3 Days Tweets**

In [None]:
# importing the dataset
comb_tweets_goog3d = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Goog/comb_tweets_goog3d.csv')
comb_tweets_goog3d = comb_tweets_goog3d.drop(columns=['Unnamed: 0'])
print(comb_tweets_goog3d)

     updated_dates  ... like_num
0       2015-01-03  ...      981
1       2015-01-04  ...     1077
2       2015-01-05  ...     1069
3       2015-01-06  ...     1319
4       2015-01-07  ...     1572
...            ...  ...      ...
1820    2019-12-28  ...     4261
1821    2019-12-29  ...     3332
1822    2019-12-30  ...     3928
1823    2019-12-31  ...     4437
1824    2020-01-01  ...     4041

[1825 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_goog3d['clean_text'] = comb_tweets_goog3d['body']
display(comb_tweets_goog3d)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,Zacks' Bull Of The Day: Apple http://seekingal...
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,Free 5€ in account balance for first 100.000 m...
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,Free 5€ in account balance for first 100.000 m...
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,Apple: Does The Party End In 2015? http://seek...
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,The Ever-Changing World Of Apple http://seekin...
...,...,...,...,...,...,...
1820,2019-12-28,Some top money flow for Dec 27Start your free ...,532,1081,4261,Some top money flow for Dec 27Start your free ...
1821,2019-12-29,Notable activity on social media in the past w...,397,1035,3332,Notable activity on social media in the past w...
1822,2019-12-30,Siguenos en telegram https://t.me/tradingbalea...,472,1577,3928,Siguenos en telegram https://t.me/tradingbalea...
1823,2019-12-31,Some top money flow for Dec 30Start your free ...,589,1436,4437,Some top money flow for Dec 30Start your free ...


In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_goog3d['clean_text'] = comb_tweets_goog3d['clean_text'].str.lower()

In [None]:
comb_tweets_goog3d['clean_text'] = comb_tweets_goog3d['clean_text'].apply(lambda x: clean_tweets(x))
display(comb_tweets_goog3d['clean_text'].head())

0    zacks bull of the day apple have a great weeke...
1    free in account balance for first members lnkd...
2    free in account balance for first members lnkd...
3    apple does the party end in long with successf...
4    the ever changing world of apple what dean kar...
Name: clean_text, dtype: object

### **Dataset - 3 : Last 7 Days Tweets**

In [None]:
# importing the dataset
comb_tweets_goog7d = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Goog/comb_tweets_goog7d.csv')
comb_tweets_goog7d = comb_tweets_goog7d.drop(columns=['Unnamed: 0'])
print(comb_tweets_goog7d)

     updated_dates  ... like_num
0       2015-01-07  ...     2876
1       2015-01-08  ...     3472
2       2015-01-09  ...     3522
3       2015-01-10  ...     3616
4       2015-01-11  ...     3519
...            ...  ...      ...
1816    2019-12-28  ...     8743
1817    2019-12-29  ...     8104
1818    2019-12-30  ...     8661
1819    2019-12-31  ...     9614
1820    2020-01-01  ...     8830

[1821 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_goog7d['clean_text'] = comb_tweets_goog7d['body']
display(comb_tweets_goog7d['clean_text'])

0       The Ever-Changing World Of Apple http://seekin...
1       One Thing Apple Should Do To Preserve Its Mark...
2       How iPhone Demand Could Impact Apple's Q2 Guid...
3       Apple: Asian Carriers Are Making iPhone 6 Chea...
4       Caturday: There is no MacBook, there is only Z...
                              ...                        
1816    Some top money flow for Dec 27Start your free ...
1817    Notable activity on social media in the past w...
1818    Siguenos en telegram https://t.me/tradingbalea...
1819    Some top money flow for Dec 30Start your free ...
1820    Popular: $AAPL, $IWM, $BTAI, $FTNT, $ALIM, $BP...
Name: clean_text, Length: 1821, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_goog7d['clean_text'] = comb_tweets_goog7d['clean_text'].str.lower()


In [None]:
comb_tweets_goog7d['clean_text'] = comb_tweets_goog7d['clean_text'].apply(lambda x: clean_tweets(x))
print(comb_tweets_goog7d['clean_text'].head())

0    the ever changing world of apple what dean kar...
1    one thing apple should do to preserve its mark...
2    how iphone demand could impact apple s q2 guid...
3    apple asian carriers are making iphone cheaper...
4    caturday there is no macbook there is only zuu...
Name: clean_text, dtype: object


### **Dataset - 4 : Previous Days Tweets**

In [None]:
# importing the dataset
comb_tweets_googprev = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Goog/comb_tweets_googprev.csv')
comb_tweets_googprev = comb_tweets_googprev.drop(columns=['Unnamed: 0'])
print(comb_tweets_googprev)

     updated_dates  ... like_num
0       2015-01-01  ...       50
1       2015-01-02  ...      408
2       2015-01-03  ...      439
3       2015-01-04  ...      133
4       2015-01-05  ...      411
...            ...  ...      ...
1821    2019-12-28  ...     1058
1822    2019-12-29  ...     1460
1823    2019-12-30  ...      577
1824    2019-12-31  ...     2684
1825    2020-01-01  ...     1081

[1826 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_googprev['clean_text'] = comb_tweets_googprev['body']
display(comb_tweets_googprev['clean_text'].head())

0    lx21 made $10,008  on $AAPL -Check it out! htt...
1    @davenathan I just expected something more ext...
2    The Closing Bell is out! http://paper.li/BonaC...
3    The Closing Bell is out! http://paper.li/BonaC...
4    #Apple : Price Target Raised to $125.00 at Arg...
Name: clean_text, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase and saving it in a new column
comb_tweets_googprev['clean_text'] = comb_tweets_googprev['clean_text'].str.lower()


In [None]:
comb_tweets_googprev['clean_text'] = comb_tweets_googprev['clean_text'].apply(lambda x: clean_tweets(x))
print(comb_tweets_googprev['clean_text'].head())

0    lx21 made on check it out learn exe watt imrs ...
1    i just expected something more extravagant lik...
2    the closing bell is out eurusd gbpusd usdjpy a...
3    the closing bell is out eurusd gbpusd usdjpy a...
4    price target raised to at argus free in accoun...
Name: clean_text, dtype: object


## **Finding Sentiment Analysis Values**

### **Dataset - 1 : Same Day Tweets**

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_goog1['polarity'] = comb_tweets_goog1['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_goog1['subjectivity'] = comb_tweets_goog1['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_goog1.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227,lx21 made on check it out learn exe watt imrs ...,0.152047,0.469165
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562,could be a problem sitting on support that bre...,0.137389,0.445212
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192,zacks bull of the day apple have a great weeke...,0.186061,0.49581
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323,free in account balance for first members lnkd...,0.214288,0.494194
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554,free in account balance for first members lnkd...,0.171292,0.464973
5,2015-01-06,Apple: Does The Party End In 2015? http://seek...,115,228,442,apple does the party end in long with successf...,0.15294,0.461804
6,2015-01-07,The Ever-Changing World Of Apple http://seekin...,105,209,576,the ever changing world of apple what dean kar...,0.170265,0.452655
7,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,251,428,823,one thing apple should do to preserve its mark...,0.189625,0.4805
8,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,177,364,612,how iphone demand could impact apple s q2 guid...,0.186721,0.474236
9,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,50,208,286,apple asian carriers are making iphone cheaper...,0.260925,0.425825


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment1goog = []

for index, row in comb_tweets_goog1.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment1goog.append(s)

print(sentiment1goog)

[{'neg': 0.049, 'neu': 0.795, 'pos': 0.156, 'compound': 0.9999}, {'neg': 0.055, 'neu': 0.825, 'pos': 0.12, 'compound': 0.9999}, {'neg': 0.047, 'neu': 0.772, 'pos': 0.181, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.762, 'pos': 0.193, 'compound': 1.0}, {'neg': 0.06, 'neu': 0.773, 'pos': 0.167, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.752, 'pos': 0.182, 'compound': 1.0}, {'neg': 0.055, 'neu': 0.781, 'pos': 0.164, 'compound': 1.0}, {'neg': 0.048, 'neu': 0.787, 'pos': 0.164, 'compound': 1.0}, {'neg': 0.054, 'neu': 0.786, 'pos': 0.159, 'compound': 1.0}, {'neg': 0.029, 'neu': 0.795, 'pos': 0.176, 'compound': 1.0}, {'neg': 0.036, 'neu': 0.752, 'pos': 0.212, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.795, 'pos': 0.155, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.76, 'pos': 0.188, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.78, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.075, 'neu': 0.785, 'pos': 0.14, 'compound': 1.0}, {'neg': 0.074, 'neu': 0.788, 'pos': 0.137, 'compound': 1.0}, {'neg': 0.058, 'neu': 

In [None]:
# converting the sentiment values to a dataframe
sentiment1goog_df = pd.DataFrame(sentiment1goog)
print(sentiment1goog_df)

        neg    neu    pos  compound
0     0.049  0.795  0.156    0.9999
1     0.055  0.825  0.120    0.9999
2     0.047  0.772  0.181    1.0000
3     0.046  0.762  0.193    1.0000
4     0.060  0.773  0.167    1.0000
...     ...    ...    ...       ...
1822  0.044  0.820  0.136    0.9999
1823  0.047  0.811  0.142    0.9999
1824  0.051  0.776  0.173    1.0000
1825  0.052  0.808  0.140    1.0000
1826  0.074  0.784  0.143    0.9988

[1827 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_goog1['neg'],comb_tweets_goog1['neu'],comb_tweets_goog1['pos'],comb_tweets_goog1['compound'] = sentiment1goog_df['neg'], sentiment1goog_df['neu'], sentiment1goog_df['pos'], sentiment1goog_df['compound'] 

comb_tweets_goog1.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227,lx21 made on check it out learn exe watt imrs ...,0.152047,0.469165,0.049,0.795,0.156,0.9999
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562,could be a problem sitting on support that bre...,0.137389,0.445212,0.055,0.825,0.12,0.9999
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192,zacks bull of the day apple have a great weeke...,0.186061,0.49581,0.047,0.772,0.181,1.0
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323,free in account balance for first members lnkd...,0.214288,0.494194,0.046,0.762,0.193,1.0
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554,free in account balance for first members lnkd...,0.171292,0.464973,0.06,0.773,0.167,1.0
5,2015-01-06,Apple: Does The Party End In 2015? http://seek...,115,228,442,apple does the party end in long with successf...,0.15294,0.461804,0.066,0.752,0.182,1.0
6,2015-01-07,The Ever-Changing World Of Apple http://seekin...,105,209,576,the ever changing world of apple what dean kar...,0.170265,0.452655,0.055,0.781,0.164,1.0
7,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,251,428,823,one thing apple should do to preserve its mark...,0.189625,0.4805,0.048,0.787,0.164,1.0
8,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,177,364,612,how iphone demand could impact apple s q2 guid...,0.186721,0.474236,0.054,0.786,0.159,1.0
9,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,50,208,286,apple asian carriers are making iphone cheaper...,0.260925,0.425825,0.029,0.795,0.176,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt1goog = []

for index, row in comb_tweets_goog1.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt1goog.append(snt)

In [None]:
print(sntmnt1goog)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_goog1['sentiment'] = sntmnt1goog
print(comb_tweets_goog1.head())

  updated_dates                                               body  \
0    2015-01-01  lx21 made $10,008  on $AAPL -Check it out! htt...   
1    2015-01-02  $aapl could be a problem sitting on support th...   
2    2015-01-03  Zacks' Bull Of The Day: Apple http://seekingal...   
3    2015-01-04  Free 5€ in account balance for first 100.000 m...   
4    2015-01-05  Free 5€ in account balance for first 100.000 m...   

   comment_num  retweet_num  like_num  \
0           17          968       227   
1          128         2102       562   
2           31          103       192   
3           52          328       323   
4          139          392       554   

                                          clean_text  polarity  subjectivity  \
0  lx21 made on check it out learn exe watt imrs ...  0.152047      0.469165   
1  could be a problem sitting on support that bre...  0.137389      0.445212   
2  zacks bull of the day apple have a great weeke...  0.186061      0.495810   
3  free in a

In [None]:
# exporting the first dataframe
comb_tweets_goog1.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Goog/comb_tweets_goog1_snt.csv')

### Dataset - 2 : Last 3 Days Tweets

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_goog3d['polarity'] = comb_tweets_goog3d['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_goog3d['subjectivity'] = comb_tweets_goog3d['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_goog3d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,zacks bull of the day apple have a great weeke...,0.158465,0.468734
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,free in account balance for first members lnkd...,0.175851,0.476125
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,free in account balance for first members lnkd...,0.183886,0.47845
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,apple does the party end in long with successf...,0.169583,0.468
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,the ever changing world of apple what dean kar...,0.164246,0.459994
5,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,471,865,1841,one thing apple should do to preserve its mark...,0.172146,0.466585
6,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,533,1001,2011,how iphone demand could impact apple s q2 guid...,0.183188,0.47059
7,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,478,1000,1721,apple asian carriers are making iphone cheaper...,0.197525,0.4712
8,2015-01-11,"Caturday: There is no MacBook, there is only Z...",251,663,1124,caturday there is no macbook there is only zuu...,0.217169,0.457028
9,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",141,531,866,the binary options competition win lnkd l gool...,0.220221,0.442879


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment3dgoog = []

for index, row in comb_tweets_goog3d.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment3dgoog.append(s)

print(sentiment3dgoog)

[{'neg': 0.059, 'neu': 0.786, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.049, 'neu': 0.776, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.053, 'neu': 0.767, 'pos': 0.18, 'compound': 1.0}, {'neg': 0.063, 'neu': 0.76, 'pos': 0.177, 'compound': 1.0}, {'neg': 0.06, 'neu': 0.766, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.058, 'neu': 0.773, 'pos': 0.169, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.784, 'pos': 0.165, 'compound': 1.0}, {'neg': 0.047, 'neu': 0.787, 'pos': 0.166, 'compound': 1.0}, {'neg': 0.048, 'neu': 0.781, 'pos': 0.171, 'compound': 1.0}, {'neg': 0.042, 'neu': 0.784, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.049, 'neu': 0.768, 'pos': 0.182, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.774, 'pos': 0.169, 'compound': 1.0}, {'neg': 0.059, 'neu': 0.77, 'pos': 0.171, 'compound': 1.0}, {'neg': 0.07, 'neu': 0.783, 'pos': 0.147, 'compound': 1.0}, {'neg': 0.073, 'neu': 0.788, 'pos': 0.139, 'compound': 1.0}, {'neg': 0.071, 'neu': 0.78, 'pos': 0.149, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.788, 

In [None]:
# converting the sentiment values to a dataframe
sentiment3dgoog_df = pd.DataFrame(sentiment3dgoog)
print(sentiment3dgoog_df)

        neg    neu    pos  compound
0     0.059  0.786  0.154       1.0
1     0.049  0.776  0.174       1.0
2     0.053  0.767  0.180       1.0
3     0.063  0.760  0.177       1.0
4     0.060  0.766  0.174       1.0
...     ...    ...    ...       ...
1820  0.054  0.795  0.151       1.0
1821  0.054  0.796  0.149       1.0
1822  0.049  0.787  0.164       1.0
1823  0.053  0.792  0.155       1.0
1824  0.055  0.791  0.154       1.0

[1825 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_goog3d['neg'],comb_tweets_goog3d['neu'],comb_tweets_goog3d['pos'],comb_tweets_goog3d['compound'] = sentiment3dgoog_df['neg'], sentiment3dgoog_df['neu'], sentiment3dgoog_df['pos'], sentiment3dgoog_df['compound'] 

comb_tweets_goog3d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,zacks bull of the day apple have a great weeke...,0.158465,0.468734,0.059,0.786,0.154,1.0
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,free in account balance for first members lnkd...,0.175851,0.476125,0.049,0.776,0.174,1.0
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,free in account balance for first members lnkd...,0.183886,0.47845,0.053,0.767,0.18,1.0
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,apple does the party end in long with successf...,0.169583,0.468,0.063,0.76,0.177,1.0
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,the ever changing world of apple what dean kar...,0.164246,0.459994,0.06,0.766,0.174,1.0
5,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,471,865,1841,one thing apple should do to preserve its mark...,0.172146,0.466585,0.058,0.773,0.169,1.0
6,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,533,1001,2011,how iphone demand could impact apple s q2 guid...,0.183188,0.47059,0.051,0.784,0.165,1.0
7,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,478,1000,1721,apple asian carriers are making iphone cheaper...,0.197525,0.4712,0.047,0.787,0.166,1.0
8,2015-01-11,"Caturday: There is no MacBook, there is only Z...",251,663,1124,caturday there is no macbook there is only zuu...,0.217169,0.457028,0.048,0.781,0.171,1.0
9,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",141,531,866,the binary options competition win lnkd l gool...,0.220221,0.442879,0.042,0.784,0.174,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt3dgoog = []

for index, row in comb_tweets_goog3d.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt3dgoog.append(snt)

In [None]:
print(sntmnt3dgoog)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_goog3d['sentiment'] = sntmnt3dgoog

In [None]:
# exporting the second dataframe
comb_tweets_goog3d.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Goog/comb_tweets_goog3d_snt.csv')

### Dataset - 3 : Last 7 Days Tweets

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_goog7d['polarity'] = comb_tweets_goog7d['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_goog7d['subjectivity'] = comb_tweets_goog7d['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_goog7d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-07,The Ever-Changing World Of Apple http://seekin...,587,4330,2876,the ever changing world of apple what dean kar...,0.166986,0.464993
1,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,821,3790,3472,one thing apple should do to preserve its mark...,0.172842,0.468477
2,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,870,2052,3522,how iphone demand could impact apple s q2 guid...,0.177947,0.471271
3,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,889,2157,3616,apple asian carriers are making iphone cheaper...,0.182011,0.467047
4,2015-01-11,"Caturday: There is no MacBook, there is only Z...",861,1920,3519,caturday there is no macbook there is only zuu...,0.184405,0.46369
5,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",789,1760,3319,the binary options competition win lnkd l gool...,0.185916,0.462454
6,2015-01-13,What's Driving Our $30 Billion Valuation For A...,809,1989,3399,what s driving our billion valuation for apple...,0.183448,0.457585
7,2015-01-14,The Storage Lawsuit Against Apple Is Beyond Fr...,817,2049,3272,the storage lawsuit against apple is beyond fr...,0.173973,0.455505
8,2015-01-15,Apple 2015 Outlook: Where Do We Go From Here? ...,711,1826,2812,apple outlook where do we go from here qihu ba...,0.167914,0.44767
9,2015-01-16,Apple's App Store Will Surprise To The Upside ...,690,1929,2561,apple s app store will surprise to the upside ...,0.151119,0.445665


In [None]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment7dgoog = []

for index, row in comb_tweets_goog7d.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment7dgoog.append(s)

print(sentiment7dgoog)

KeyboardInterrupt: ignored

In [None]:
# converting the sentiment values to a dataframe
sentiment7dgoog_df = pd.DataFrame(sentiment7dgoog)
print(sentiment7dgoog_df)

In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_goog7d['neg'],comb_tweets_goog7d['neu'],comb_tweets_goog7d['pos'],comb_tweets_goog7d['compound'] = sentiment7dgoog_df['neg'], sentiment7dgoog_df['neu'], sentiment7dgoog_df['pos'], sentiment7dgoog_df['compound'] 

comb_tweets_goog7d.head(10)

In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt7dgoog = []

for index, row in comb_tweets_goog7d.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt7dgoog.append(snt)

In [None]:
print(sntmnt7dgoog)

In [None]:
comb_tweets_goog7d['sentiment'] = sntmnt7dgoog

In [None]:
# exporting the third dataframe
comb_tweets_goog7d.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Goog/comb_tweets_goog7d_snt.csv')

### Dataset - 4 : Previous Day Tweets

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_googprev['polarity'] = comb_tweets_googprev['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_googprev['subjectivity'] = comb_tweets_googprev['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_googprev.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",6,33,50,lx21 made on check it out learn exe watt imrs ...,0.067588,0.469103
1,2015-01-02,@davenathan I just expected something more ext...,52,1240,408,i just expected something more extravagant lik...,0.166638,0.469114
2,2015-01-03,The Closing Bell is out! http://paper.li/BonaC...,108,1858,439,the closing bell is out eurusd gbpusd usdjpy a...,0.146284,0.446422
3,2015-01-04,The Closing Bell is out! http://paper.li/BonaC...,15,62,133,the closing bell is out eurusd gbpusd usdjpy a...,0.216906,0.507264
4,2015-01-05,#Apple : Price Target Raised to $125.00 at Arg...,74,385,411,price target raised to at argus free in accoun...,0.228005,0.496103
5,2015-01-06,Stock Contest!! Pick $COP and WIN a FREE Table...,128,373,536,stock contest pick cop and win a free tablet e...,0.159218,0.460904
6,2015-01-07,Apple reports earnings on January 27 http://m....,141,254,525,apple reports earnings on january join me on t...,0.15465,0.461288
7,2015-01-08,More U.S. dollar damage $AAPL:$USD - http://sc...,91,196,516,more u s dollar damage usd here s how apple co...,0.172645,0.453962
8,2015-01-09,Those stocks? $AAPL $BABA $TSLA $VA ... If sh...,267,487,877,those stocks baba va if she had let me do i wo...,0.194843,0.482668
9,2015-01-10,"Join me on tsū, is sharing social revenues wit...",166,394,587,join me on ts is sharing social revenues with ...,0.179049,0.466481


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentimentprevgoog = []

for index, row in comb_tweets_googprev.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentimentprevgoog.append(s)

print(sentimentprevgoog)

[{'neg': 0.041, 'neu': 0.894, 'pos': 0.065, 'compound': 0.9354}, {'neg': 0.04, 'neu': 0.835, 'pos': 0.125, 'compound': 0.9998}, {'neg': 0.058, 'neu': 0.794, 'pos': 0.148, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.754, 'pos': 0.2, 'compound': 1.0}, {'neg': 0.043, 'neu': 0.758, 'pos': 0.198, 'compound': 1.0}, {'neg': 0.067, 'neu': 0.778, 'pos': 0.155, 'compound': 1.0}, {'neg': 0.07, 'neu': 0.756, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.052, 'neu': 0.776, 'pos': 0.172, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.8, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.061, 'neu': 0.798, 'pos': 0.142, 'compound': 1.0}, {'neg': 0.022, 'neu': 0.839, 'pos': 0.139, 'compound': 0.9999}, {'neg': 0.014, 'neu': 0.845, 'pos': 0.141, 'compound': 0.9999}, {'neg': 0.049, 'neu': 0.785, 'pos': 0.166, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.772, 'pos': 0.172, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.796, 'pos': 0.138, 'compound': 1.0}, {'neg': 0.079, 'neu': 0.782, 'pos': 0.139, 'compound': 1.0}, {'neg': 0.078, 'n

In [None]:
# converting the sentiment values to a dataframe
sentimentprevgoog_df = pd.DataFrame(sentimentprevgoog)
print(sentimentprevgoog_df)

        neg    neu    pos  compound
0     0.041  0.894  0.065    0.9354
1     0.040  0.835  0.125    0.9998
2     0.058  0.794  0.148    1.0000
3     0.046  0.754  0.200    1.0000
4     0.043  0.758  0.198    1.0000
...     ...    ...    ...       ...
1821  0.059  0.793  0.149    1.0000
1822  0.050  0.788  0.163    0.9999
1823  0.059  0.800  0.142    0.9999
1824  0.051  0.779  0.169    1.0000
1825  0.055  0.807  0.138    1.0000

[1826 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_googprev['neg'],comb_tweets_googprev['neu'],comb_tweets_googprev['pos'],comb_tweets_googprev['compound'] = sentimentprevgoog_df['neg'], sentimentprevgoog_df['neu'], sentimentprevgoog_df['pos'], sentimentprevgoog_df['compound'] 

comb_tweets_googprev.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",6,33,50,lx21 made on check it out learn exe watt imrs ...,0.067588,0.469103,0.041,0.894,0.065,0.9354
1,2015-01-02,@davenathan I just expected something more ext...,52,1240,408,i just expected something more extravagant lik...,0.166638,0.469114,0.04,0.835,0.125,0.9998
2,2015-01-03,The Closing Bell is out! http://paper.li/BonaC...,108,1858,439,the closing bell is out eurusd gbpusd usdjpy a...,0.146284,0.446422,0.058,0.794,0.148,1.0
3,2015-01-04,The Closing Bell is out! http://paper.li/BonaC...,15,62,133,the closing bell is out eurusd gbpusd usdjpy a...,0.216906,0.507264,0.046,0.754,0.2,1.0
4,2015-01-05,#Apple : Price Target Raised to $125.00 at Arg...,74,385,411,price target raised to at argus free in accoun...,0.228005,0.496103,0.043,0.758,0.198,1.0
5,2015-01-06,Stock Contest!! Pick $COP and WIN a FREE Table...,128,373,536,stock contest pick cop and win a free tablet e...,0.159218,0.460904,0.067,0.778,0.155,1.0
6,2015-01-07,Apple reports earnings on January 27 http://m....,141,254,525,apple reports earnings on january join me on t...,0.15465,0.461288,0.07,0.756,0.174,1.0
7,2015-01-08,More U.S. dollar damage $AAPL:$USD - http://sc...,91,196,516,more u s dollar damage usd here s how apple co...,0.172645,0.453962,0.052,0.776,0.172,1.0
8,2015-01-09,Those stocks? $AAPL $BABA $TSLA $VA ... If sh...,267,487,877,those stocks baba va if she had let me do i wo...,0.194843,0.482668,0.046,0.8,0.154,1.0
9,2015-01-10,"Join me on tsū, is sharing social revenues wit...",166,394,587,join me on ts is sharing social revenues with ...,0.179049,0.466481,0.061,0.798,0.142,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmntprevgoog = []

for index, row in comb_tweets_googprev.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmntprevgoog.append(snt)

In [None]:
print(sntmntprevgoog)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_googprev['sentiment'] = sntmntprevgoog

In [None]:
# exporting the first dataframe
comb_tweets_googprev.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Goog/comb_tweets_googprev_snt.csv')

# **For Googl**

## **Dataset Cleaning**

### **Dataset 1 : Same Day Tweets**

In [None]:
# importing the dataset
comb_tweets_googl1 = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Googl/comb_tweets_googl1.csv')
comb_tweets_googl1 = comb_tweets_googl1.drop(columns=['Unnamed: 0'])
display(comb_tweets_googl1)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554
...,...,...,...,...,...
1822,2019-12-28,Some top money flow for Dec 27Start your free ...,127,545,1579
1823,2019-12-29,Notable activity on social media in the past w...,74,307,528
1824,2019-12-30,Siguenos en telegram https://t.me/tradingbalea...,271,725,1821
1825,2019-12-31,Some top money flow for Dec 30Start your free ...,244,404,2088


In [None]:
# extracting the tweet body column
comb_tweets_googl1['clean_text'] = comb_tweets_googl1['body']
display(comb_tweets_googl1['clean_text'])

0       lx21 made $10,008  on $AAPL -Check it out! htt...
1       $aapl could be a problem sitting on support th...
2       Zacks' Bull Of The Day: Apple http://seekingal...
3       Free 5€ in account balance for first 100.000 m...
4       Free 5€ in account balance for first 100.000 m...
                              ...                        
1822    Some top money flow for Dec 27Start your free ...
1823    Notable activity on social media in the past w...
1824    Siguenos en telegram https://t.me/tradingbalea...
1825    Some top money flow for Dec 30Start your free ...
1826    Popular: $AAPL, $IWM, $BTAI, $FTNT, $ALIM, $BP...
Name: clean_text, Length: 1827, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_googl1['clean_text'] = comb_tweets_googl1['clean_text'].str.lower()

In [None]:
# viewing the cleaned tweets
comb_tweets_googl1['clean_text'] = comb_tweets_googl1['clean_text'].apply(lambda x: clean_tweets(x))
display(comb_tweets_googl1['clean_text'].head())

0    lx21 made on check it out learn exe watt imrs ...
1    could be a problem sitting on support that bre...
2    zacks bull of the day apple have a great weeke...
3    free in account balance for first members lnkd...
4    free in account balance for first members lnkd...
Name: clean_text, dtype: object

### **Dataset - 2 : Last 3 Days Tweets**

In [None]:
# importing the dataset
comb_tweets_googl3d = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Googl/comb_tweets_googl3d.csv')
comb_tweets_googl3d = comb_tweets_googl3d.drop(columns=['Unnamed: 0'])
print(comb_tweets_googl3d)

     updated_dates  ... like_num
0       2015-01-03  ...      981
1       2015-01-04  ...     1077
2       2015-01-05  ...     1069
3       2015-01-06  ...     1319
4       2015-01-07  ...     1572
...            ...  ...      ...
1820    2019-12-28  ...     4261
1821    2019-12-29  ...     3332
1822    2019-12-30  ...     3928
1823    2019-12-31  ...     4437
1824    2020-01-01  ...     4041

[1825 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_googl3d['clean_text'] = comb_tweets_googl3d['body']
display(comb_tweets_googl3d)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,Zacks' Bull Of The Day: Apple http://seekingal...
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,Free 5€ in account balance for first 100.000 m...
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,Free 5€ in account balance for first 100.000 m...
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,Apple: Does The Party End In 2015? http://seek...
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,The Ever-Changing World Of Apple http://seekin...
...,...,...,...,...,...,...
1820,2019-12-28,Some top money flow for Dec 27Start your free ...,532,1081,4261,Some top money flow for Dec 27Start your free ...
1821,2019-12-29,Notable activity on social media in the past w...,397,1035,3332,Notable activity on social media in the past w...
1822,2019-12-30,Siguenos en telegram https://t.me/tradingbalea...,472,1577,3928,Siguenos en telegram https://t.me/tradingbalea...
1823,2019-12-31,Some top money flow for Dec 30Start your free ...,589,1436,4437,Some top money flow for Dec 30Start your free ...


In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_googl3d['clean_text'] = comb_tweets_googl3d['clean_text'].str.lower()

In [None]:
comb_tweets_googl3d['clean_text'] = comb_tweets_googl3d['clean_text'].apply(lambda x: clean_tweets(x))
display(comb_tweets_googl3d['clean_text'].head())

0    zacks bull of the day apple have a great weeke...
1    free in account balance for first members lnkd...
2    free in account balance for first members lnkd...
3    apple does the party end in long with successf...
4    the ever changing world of apple what dean kar...
Name: clean_text, dtype: object

### **Dataset - 3 : Last 7 Days Tweets**

In [None]:
# importing the dataset
comb_tweets_googl7d = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Googl/comb_tweets_googl7d.csv')
comb_tweets_googl7d = comb_tweets_googl7d.drop(columns=['Unnamed: 0'])
print(comb_tweets_googl7d)

     updated_dates  ... like_num
0       2015-01-07  ...     2876
1       2015-01-08  ...     3472
2       2015-01-09  ...     3522
3       2015-01-10  ...     3616
4       2015-01-11  ...     3519
...            ...  ...      ...
1816    2019-12-28  ...     8743
1817    2019-12-29  ...     8104
1818    2019-12-30  ...     8661
1819    2019-12-31  ...     9614
1820    2020-01-01  ...     8830

[1821 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_googl7d['clean_text'] = comb_tweets_googl7d['body']
display(comb_tweets_googl7d['clean_text'])

0       The Ever-Changing World Of Apple http://seekin...
1       One Thing Apple Should Do To Preserve Its Mark...
2       How iPhone Demand Could Impact Apple's Q2 Guid...
3       Apple: Asian Carriers Are Making iPhone 6 Chea...
4       Caturday: There is no MacBook, there is only Z...
                              ...                        
1816    Some top money flow for Dec 27Start your free ...
1817    Notable activity on social media in the past w...
1818    Siguenos en telegram https://t.me/tradingbalea...
1819    Some top money flow for Dec 30Start your free ...
1820    Popular: $AAPL, $IWM, $BTAI, $FTNT, $ALIM, $BP...
Name: clean_text, Length: 1821, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase
comb_tweets_googl7d['clean_text'] = comb_tweets_googl7d['clean_text'].str.lower()


In [None]:
comb_tweets_googl7d['clean_text'] = comb_tweets_googl7d['clean_text'].apply(lambda x: clean_tweets(x))
print(comb_tweets_googl7d['clean_text'].head())

0    the ever changing world of apple what dean kar...
1    one thing apple should do to preserve its mark...
2    how iphone demand could impact apple s q2 guid...
3    apple asian carriers are making iphone cheaper...
4    caturday there is no macbook there is only zuu...
Name: clean_text, dtype: object


### **Dataset - 4 : Previous Days Tweets**

In [None]:
# importing the dataset
comb_tweets_googlprev = pd.read_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Googl/comb_tweets_googlprev.csv')
comb_tweets_googlprev = comb_tweets_googlprev.drop(columns=['Unnamed: 0'])
print(comb_tweets_googlprev)

     updated_dates  ... like_num
0       2015-01-01  ...       50
1       2015-01-02  ...      408
2       2015-01-03  ...      439
3       2015-01-04  ...      133
4       2015-01-05  ...      411
...            ...  ...      ...
1821    2019-12-28  ...     1058
1822    2019-12-29  ...     1460
1823    2019-12-30  ...      577
1824    2019-12-31  ...     2684
1825    2020-01-01  ...     1081

[1826 rows x 5 columns]


In [None]:
# extracting the tweet body column
comb_tweets_googlprev['clean_text'] = comb_tweets_googlprev['body']
display(comb_tweets_googlprev['clean_text'].head())

0    lx21 made $10,008  on $AAPL -Check it out! htt...
1    @davenathan I just expected something more ext...
2    The Closing Bell is out! http://paper.li/BonaC...
3    The Closing Bell is out! http://paper.li/BonaC...
4    #Apple : Price Target Raised to $125.00 at Arg...
Name: clean_text, dtype: object

In [None]:
# cleaning the tweets

# coverting all the text to lowercase and saving it in a new column
comb_tweets_googlprev['clean_text'] = comb_tweets_googlprev['clean_text'].str.lower()


In [None]:
comb_tweets_googlprev['clean_text'] = comb_tweets_googlprev['clean_text'].apply(lambda x: clean_tweets(x))
print(comb_tweets_googlprev['clean_text'].head())

0    lx21 made on check it out learn exe watt imrs ...
1    i just expected something more extravagant lik...
2    the closing bell is out eurusd gbpusd usdjpy a...
3    the closing bell is out eurusd gbpusd usdjpy a...
4    price target raised to at argus free in accoun...
Name: clean_text, dtype: object


## **Finding Sentiment Analysis Values**

### **Dataset - 1 : Same Day Tweets**

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_googl1['polarity'] = comb_tweets_googl1['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_googl1['subjectivity'] = comb_tweets_googl1['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_googl1.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227,lx21 made on check it out learn exe watt imrs ...,0.152047,0.469165
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562,could be a problem sitting on support that bre...,0.137389,0.445212
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192,zacks bull of the day apple have a great weeke...,0.186061,0.49581
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323,free in account balance for first members lnkd...,0.214288,0.494194
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554,free in account balance for first members lnkd...,0.171292,0.464973
5,2015-01-06,Apple: Does The Party End In 2015? http://seek...,115,228,442,apple does the party end in long with successf...,0.15294,0.461804
6,2015-01-07,The Ever-Changing World Of Apple http://seekin...,105,209,576,the ever changing world of apple what dean kar...,0.170265,0.452655
7,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,251,428,823,one thing apple should do to preserve its mark...,0.189625,0.4805
8,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,177,364,612,how iphone demand could impact apple s q2 guid...,0.186721,0.474236
9,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,50,208,286,apple asian carriers are making iphone cheaper...,0.260925,0.425825


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment1googl = []

for index, row in comb_tweets_googl1.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment1googl.append(s)

print(sentiment1googl)

[{'neg': 0.049, 'neu': 0.795, 'pos': 0.156, 'compound': 0.9999}, {'neg': 0.055, 'neu': 0.825, 'pos': 0.12, 'compound': 0.9999}, {'neg': 0.047, 'neu': 0.772, 'pos': 0.181, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.762, 'pos': 0.193, 'compound': 1.0}, {'neg': 0.06, 'neu': 0.773, 'pos': 0.167, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.752, 'pos': 0.182, 'compound': 1.0}, {'neg': 0.055, 'neu': 0.781, 'pos': 0.164, 'compound': 1.0}, {'neg': 0.048, 'neu': 0.787, 'pos': 0.164, 'compound': 1.0}, {'neg': 0.054, 'neu': 0.786, 'pos': 0.159, 'compound': 1.0}, {'neg': 0.029, 'neu': 0.795, 'pos': 0.176, 'compound': 1.0}, {'neg': 0.036, 'neu': 0.752, 'pos': 0.212, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.795, 'pos': 0.155, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.76, 'pos': 0.188, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.78, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.075, 'neu': 0.785, 'pos': 0.14, 'compound': 1.0}, {'neg': 0.074, 'neu': 0.788, 'pos': 0.137, 'compound': 1.0}, {'neg': 0.058, 'neu': 

In [None]:
# converting the sentiment values to a dataframe
sentiment1googl_df = pd.DataFrame(sentiment1googl)
print(sentiment1googl_df)

        neg    neu    pos  compound
0     0.049  0.795  0.156    0.9999
1     0.055  0.825  0.120    0.9999
2     0.047  0.772  0.181    1.0000
3     0.046  0.762  0.193    1.0000
4     0.060  0.773  0.167    1.0000
...     ...    ...    ...       ...
1822  0.044  0.820  0.136    0.9999
1823  0.047  0.811  0.142    0.9999
1824  0.051  0.776  0.173    1.0000
1825  0.052  0.808  0.140    1.0000
1826  0.074  0.784  0.143    0.9988

[1827 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_googl1['neg'],comb_tweets_googl1['neu'],comb_tweets_googl1['pos'],comb_tweets_googl1['compound'] = sentiment1googl_df['neg'], sentiment1googl_df['neu'], sentiment1googl_df['pos'], sentiment1googl_df['compound'] 

comb_tweets_googl1.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",17,968,227,lx21 made on check it out learn exe watt imrs ...,0.152047,0.469165,0.049,0.795,0.156,0.9999
1,2015-01-02,$aapl could be a problem sitting on support th...,128,2102,562,could be a problem sitting on support that bre...,0.137389,0.445212,0.055,0.825,0.12,0.9999
2,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,31,103,192,zacks bull of the day apple have a great weeke...,0.186061,0.49581,0.047,0.772,0.181,1.0
3,2015-01-04,Free 5€ in account balance for first 100.000 m...,52,328,323,free in account balance for first members lnkd...,0.214288,0.494194,0.046,0.762,0.193,1.0
4,2015-01-05,Free 5€ in account balance for first 100.000 m...,139,392,554,free in account balance for first members lnkd...,0.171292,0.464973,0.06,0.773,0.167,1.0
5,2015-01-06,Apple: Does The Party End In 2015? http://seek...,115,228,442,apple does the party end in long with successf...,0.15294,0.461804,0.066,0.752,0.182,1.0
6,2015-01-07,The Ever-Changing World Of Apple http://seekin...,105,209,576,the ever changing world of apple what dean kar...,0.170265,0.452655,0.055,0.781,0.164,1.0
7,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,251,428,823,one thing apple should do to preserve its mark...,0.189625,0.4805,0.048,0.787,0.164,1.0
8,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,177,364,612,how iphone demand could impact apple s q2 guid...,0.186721,0.474236,0.054,0.786,0.159,1.0
9,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,50,208,286,apple asian carriers are making iphone cheaper...,0.260925,0.425825,0.029,0.795,0.176,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt1googl = []

for index, row in comb_tweets_googl1.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt1googl.append(snt)

In [None]:
print(sntmnt1googl)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_googl1['sentiment'] = sntmnt1googl
print(comb_tweets_googl1.head())

  updated_dates                                               body  \
0    2015-01-01  lx21 made $10,008  on $AAPL -Check it out! htt...   
1    2015-01-02  $aapl could be a problem sitting on support th...   
2    2015-01-03  Zacks' Bull Of The Day: Apple http://seekingal...   
3    2015-01-04  Free 5€ in account balance for first 100.000 m...   
4    2015-01-05  Free 5€ in account balance for first 100.000 m...   

   comment_num  retweet_num  like_num  \
0           17          968       227   
1          128         2102       562   
2           31          103       192   
3           52          328       323   
4          139          392       554   

                                          clean_text  polarity  subjectivity  \
0  lx21 made on check it out learn exe watt imrs ...  0.152047      0.469165   
1  could be a problem sitting on support that bre...  0.137389      0.445212   
2  zacks bull of the day apple have a great weeke...  0.186061      0.495810   
3  free in a

In [None]:
# exporting the first dataframe
comb_tweets_googl1.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Googl/comb_tweets_googl1_snt.csv')

### Dataset - 2 : Last 3 Days Tweets

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_googl3d['polarity'] = comb_tweets_googl3d['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_googl3d['subjectivity'] = comb_tweets_googl3d['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_googl3d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,zacks bull of the day apple have a great weeke...,0.158465,0.468734
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,free in account balance for first members lnkd...,0.175851,0.476125
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,free in account balance for first members lnkd...,0.183886,0.47845
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,apple does the party end in long with successf...,0.169583,0.468
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,the ever changing world of apple what dean kar...,0.164246,0.459994
5,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,471,865,1841,one thing apple should do to preserve its mark...,0.172146,0.466585
6,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,533,1001,2011,how iphone demand could impact apple s q2 guid...,0.183188,0.47059
7,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,478,1000,1721,apple asian carriers are making iphone cheaper...,0.197525,0.4712
8,2015-01-11,"Caturday: There is no MacBook, there is only Z...",251,663,1124,caturday there is no macbook there is only zuu...,0.217169,0.457028
9,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",141,531,866,the binary options competition win lnkd l gool...,0.220221,0.442879


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment3dgoogl = []

for index, row in comb_tweets_googl3d.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment3dgoogl.append(s)

print(sentiment3dgoogl)

[{'neg': 0.059, 'neu': 0.786, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.049, 'neu': 0.776, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.053, 'neu': 0.767, 'pos': 0.18, 'compound': 1.0}, {'neg': 0.063, 'neu': 0.76, 'pos': 0.177, 'compound': 1.0}, {'neg': 0.06, 'neu': 0.766, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.058, 'neu': 0.773, 'pos': 0.169, 'compound': 1.0}, {'neg': 0.051, 'neu': 0.784, 'pos': 0.165, 'compound': 1.0}, {'neg': 0.047, 'neu': 0.787, 'pos': 0.166, 'compound': 1.0}, {'neg': 0.048, 'neu': 0.781, 'pos': 0.171, 'compound': 1.0}, {'neg': 0.042, 'neu': 0.784, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.049, 'neu': 0.768, 'pos': 0.182, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.774, 'pos': 0.169, 'compound': 1.0}, {'neg': 0.059, 'neu': 0.77, 'pos': 0.171, 'compound': 1.0}, {'neg': 0.07, 'neu': 0.783, 'pos': 0.147, 'compound': 1.0}, {'neg': 0.073, 'neu': 0.788, 'pos': 0.139, 'compound': 1.0}, {'neg': 0.071, 'neu': 0.78, 'pos': 0.149, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.788, 

In [None]:
# converting the sentiment values to a dataframe
sentiment3dgoogl_df = pd.DataFrame(sentiment3dgoogl)
print(sentiment3dgoogl_df)

        neg    neu    pos  compound
0     0.059  0.786  0.154       1.0
1     0.049  0.776  0.174       1.0
2     0.053  0.767  0.180       1.0
3     0.063  0.760  0.177       1.0
4     0.060  0.766  0.174       1.0
...     ...    ...    ...       ...
1820  0.054  0.795  0.151       1.0
1821  0.054  0.796  0.149       1.0
1822  0.049  0.787  0.164       1.0
1823  0.053  0.792  0.155       1.0
1824  0.055  0.791  0.154       1.0

[1825 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_googl3d['neg'],comb_tweets_googl3d['neu'],comb_tweets_googl3d['pos'],comb_tweets_googl3d['compound'] = sentiment3dgoogl_df['neg'], sentiment3dgoogl_df['neu'], sentiment3dgoogl_df['pos'], sentiment3dgoogl_df['compound'] 

comb_tweets_googl3d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-03,Zacks' Bull Of The Day: Apple http://seekingal...,176,3173,981,zacks bull of the day apple have a great weeke...,0.158465,0.468734,0.059,0.786,0.154,1.0
1,2015-01-04,Free 5€ in account balance for first 100.000 m...,211,2533,1077,free in account balance for first members lnkd...,0.175851,0.476125,0.049,0.776,0.174,1.0
2,2015-01-05,Free 5€ in account balance for first 100.000 m...,222,823,1069,free in account balance for first members lnkd...,0.183886,0.47845,0.053,0.767,0.18,1.0
3,2015-01-06,Apple: Does The Party End In 2015? http://seek...,306,948,1319,apple does the party end in long with successf...,0.169583,0.468,0.063,0.76,0.177,1.0
4,2015-01-07,The Ever-Changing World Of Apple http://seekin...,359,829,1572,the ever changing world of apple what dean kar...,0.164246,0.459994,0.06,0.766,0.174,1.0
5,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,471,865,1841,one thing apple should do to preserve its mark...,0.172146,0.466585,0.058,0.773,0.169,1.0
6,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,533,1001,2011,how iphone demand could impact apple s q2 guid...,0.183188,0.47059,0.051,0.784,0.165,1.0
7,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,478,1000,1721,apple asian carriers are making iphone cheaper...,0.197525,0.4712,0.047,0.787,0.166,1.0
8,2015-01-11,"Caturday: There is no MacBook, there is only Z...",251,663,1124,caturday there is no macbook there is only zuu...,0.217169,0.457028,0.048,0.781,0.171,1.0
9,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",141,531,866,the binary options competition win lnkd l gool...,0.220221,0.442879,0.042,0.784,0.174,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt3dgoogl = []

for index, row in comb_tweets_googl3d.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt3dgoogl.append(snt)

In [None]:
print(sntmnt3dgoogl)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_googl3d['sentiment'] = sntmnt3dgoogl

In [None]:
# exporting the second dataframe
comb_tweets_googl3d.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Googl/comb_tweets_googl3d_snt.csv')

### Dataset - 3 : Last 7 Days Tweets

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_googl7d['polarity'] = comb_tweets_googl7d['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_googl7d['subjectivity'] = comb_tweets_googl7d['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_googl7d.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-07,The Ever-Changing World Of Apple http://seekin...,587,4330,2876,the ever changing world of apple what dean kar...,0.166986,0.464993
1,2015-01-08,One Thing Apple Should Do To Preserve Its Mark...,821,3790,3472,one thing apple should do to preserve its mark...,0.172842,0.468477
2,2015-01-09,How iPhone Demand Could Impact Apple's Q2 Guid...,870,2052,3522,how iphone demand could impact apple s q2 guid...,0.177947,0.471271
3,2015-01-10,Apple: Asian Carriers Are Making iPhone 6 Chea...,889,2157,3616,apple asian carriers are making iphone cheaper...,0.182011,0.467047
4,2015-01-11,"Caturday: There is no MacBook, there is only Z...",861,1920,3519,caturday there is no macbook there is only zuu...,0.184405,0.46369
5,2015-01-12,"THE BINARY OPTIONS COMPETITION WIN $100,000 ht...",789,1760,3319,the binary options competition win lnkd l gool...,0.185916,0.462454
6,2015-01-13,What's Driving Our $30 Billion Valuation For A...,809,1989,3399,what s driving our billion valuation for apple...,0.183448,0.457585
7,2015-01-14,The Storage Lawsuit Against Apple Is Beyond Fr...,817,2049,3272,the storage lawsuit against apple is beyond fr...,0.173973,0.455505
8,2015-01-15,Apple 2015 Outlook: Where Do We Go From Here? ...,711,1826,2812,apple outlook where do we go from here qihu ba...,0.167914,0.44767
9,2015-01-16,Apple's App Store Will Surprise To The Upside ...,690,1929,2561,apple s app store will surprise to the upside ...,0.151119,0.445665


In [None]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentiment7dgoogl = []

for index, row in comb_tweets_googl7d.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentiment7dgoogl.append(s)

print(sentiment7dgoogl)

KeyboardInterrupt: ignored

In [None]:
# converting the sentiment values to a dataframe
sentiment7dgoogl_df = pd.DataFrame(sentiment7dgoogl)
print(sentiment7dgoogl_df)

In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_googl7d['neg'],comb_tweets_googl7d['neu'],comb_tweets_googl7d['pos'],comb_tweets_googl7d['compound'] = sentiment7dgoogl_df['neg'], sentiment7dgoogl_df['neu'], sentiment7dgoogl_df['pos'], sentiment7dgoogl_df['compound'] 

comb_tweets_googl7d.head(10)

In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmnt7dgoogl = []

for index, row in comb_tweets_googl7d.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmnt7dgoogl.append(snt)

In [None]:
print(sntmnt7dgoogl)

In [None]:
comb_tweets_googl7d['sentiment'] = sntmnt7dgoogl

In [None]:
# exporting the third dataframe
comb_tweets_googl7d.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Googl/comb_tweets_googl7d_snt.csv')

### Dataset - 4 : Previous Day Tweets

In [None]:
# calculating polarity, subjectivity, sentiment, negative, positive, neutral and compound values for each tweet

# calculating polarity and subjectivity    

comb_tweets_googlprev['polarity'] = comb_tweets_googlprev['clean_text'].apply(lambda x: tb(x).sentiment.polarity)
comb_tweets_googlprev['subjectivity'] = comb_tweets_googlprev['clean_text'].apply(lambda x: tb(x).sentiment.subjectivity)
  
comb_tweets_googlprev.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",6,33,50,lx21 made on check it out learn exe watt imrs ...,0.067588,0.469103
1,2015-01-02,@davenathan I just expected something more ext...,52,1240,408,i just expected something more extravagant lik...,0.166638,0.469114
2,2015-01-03,The Closing Bell is out! http://paper.li/BonaC...,108,1858,439,the closing bell is out eurusd gbpusd usdjpy a...,0.146284,0.446422
3,2015-01-04,The Closing Bell is out! http://paper.li/BonaC...,15,62,133,the closing bell is out eurusd gbpusd usdjpy a...,0.216906,0.507264
4,2015-01-05,#Apple : Price Target Raised to $125.00 at Arg...,74,385,411,price target raised to at argus free in accoun...,0.228005,0.496103
5,2015-01-06,Stock Contest!! Pick $COP and WIN a FREE Table...,128,373,536,stock contest pick cop and win a free tablet e...,0.159218,0.460904
6,2015-01-07,Apple reports earnings on January 27 http://m....,141,254,525,apple reports earnings on january join me on t...,0.15465,0.461288
7,2015-01-08,More U.S. dollar damage $AAPL:$USD - http://sc...,91,196,516,more u s dollar damage usd here s how apple co...,0.172645,0.453962
8,2015-01-09,Those stocks? $AAPL $BABA $TSLA $VA ... If sh...,267,487,877,those stocks baba va if she had let me do i wo...,0.194843,0.482668
9,2015-01-10,"Join me on tsū, is sharing social revenues wit...",166,394,587,join me on ts is sharing social revenues with ...,0.179049,0.466481


In [None]:
# calculating sentiment, negative, positive, neutral and compound values

# defining a function to calculate sentiment, positive, neutral and compound values

sentimentprevgoogl = []

for index, row in comb_tweets_googlprev.iterrows():
    s = SentimentIntensityAnalyzer().polarity_scores(row['clean_text'])
    sentimentprevgoogl.append(s)

print(sentimentprevgoogl)

[{'neg': 0.041, 'neu': 0.894, 'pos': 0.065, 'compound': 0.9354}, {'neg': 0.04, 'neu': 0.835, 'pos': 0.125, 'compound': 0.9998}, {'neg': 0.058, 'neu': 0.794, 'pos': 0.148, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.754, 'pos': 0.2, 'compound': 1.0}, {'neg': 0.043, 'neu': 0.758, 'pos': 0.198, 'compound': 1.0}, {'neg': 0.067, 'neu': 0.778, 'pos': 0.155, 'compound': 1.0}, {'neg': 0.07, 'neu': 0.756, 'pos': 0.174, 'compound': 1.0}, {'neg': 0.052, 'neu': 0.776, 'pos': 0.172, 'compound': 1.0}, {'neg': 0.046, 'neu': 0.8, 'pos': 0.154, 'compound': 1.0}, {'neg': 0.061, 'neu': 0.798, 'pos': 0.142, 'compound': 1.0}, {'neg': 0.022, 'neu': 0.839, 'pos': 0.139, 'compound': 0.9999}, {'neg': 0.014, 'neu': 0.845, 'pos': 0.141, 'compound': 0.9999}, {'neg': 0.049, 'neu': 0.785, 'pos': 0.166, 'compound': 1.0}, {'neg': 0.057, 'neu': 0.772, 'pos': 0.172, 'compound': 1.0}, {'neg': 0.066, 'neu': 0.796, 'pos': 0.138, 'compound': 1.0}, {'neg': 0.079, 'neu': 0.782, 'pos': 0.139, 'compound': 1.0}, {'neg': 0.078, 'n

In [None]:
# converting the sentiment values to a dataframe
sentimentprevgoogl_df = pd.DataFrame(sentimentprevgoogl)
print(sentimentprevgoogl_df)

        neg    neu    pos  compound
0     0.041  0.894  0.065    0.9354
1     0.040  0.835  0.125    0.9998
2     0.058  0.794  0.148    1.0000
3     0.046  0.754  0.200    1.0000
4     0.043  0.758  0.198    1.0000
...     ...    ...    ...       ...
1821  0.059  0.793  0.149    1.0000
1822  0.050  0.788  0.163    0.9999
1823  0.059  0.800  0.142    0.9999
1824  0.051  0.779  0.169    1.0000
1825  0.055  0.807  0.138    1.0000

[1826 rows x 4 columns]


In [None]:
# adding the sentiment values to orginial dataframe
comb_tweets_googlprev['neg'],comb_tweets_googlprev['neu'],comb_tweets_googlprev['pos'],comb_tweets_googlprev['compound'] = sentimentprevgoogl_df['neg'], sentimentprevgoogl_df['neu'], sentimentprevgoogl_df['pos'], sentimentprevgoogl_df['compound'] 

comb_tweets_googlprev.head(10)

Unnamed: 0,updated_dates,body,comment_num,retweet_num,like_num,clean_text,polarity,subjectivity,neg,neu,pos,compound
0,2015-01-01,"lx21 made $10,008 on $AAPL -Check it out! htt...",6,33,50,lx21 made on check it out learn exe watt imrs ...,0.067588,0.469103,0.041,0.894,0.065,0.9354
1,2015-01-02,@davenathan I just expected something more ext...,52,1240,408,i just expected something more extravagant lik...,0.166638,0.469114,0.04,0.835,0.125,0.9998
2,2015-01-03,The Closing Bell is out! http://paper.li/BonaC...,108,1858,439,the closing bell is out eurusd gbpusd usdjpy a...,0.146284,0.446422,0.058,0.794,0.148,1.0
3,2015-01-04,The Closing Bell is out! http://paper.li/BonaC...,15,62,133,the closing bell is out eurusd gbpusd usdjpy a...,0.216906,0.507264,0.046,0.754,0.2,1.0
4,2015-01-05,#Apple : Price Target Raised to $125.00 at Arg...,74,385,411,price target raised to at argus free in accoun...,0.228005,0.496103,0.043,0.758,0.198,1.0
5,2015-01-06,Stock Contest!! Pick $COP and WIN a FREE Table...,128,373,536,stock contest pick cop and win a free tablet e...,0.159218,0.460904,0.067,0.778,0.155,1.0
6,2015-01-07,Apple reports earnings on January 27 http://m....,141,254,525,apple reports earnings on january join me on t...,0.15465,0.461288,0.07,0.756,0.174,1.0
7,2015-01-08,More U.S. dollar damage $AAPL:$USD - http://sc...,91,196,516,more u s dollar damage usd here s how apple co...,0.172645,0.453962,0.052,0.776,0.172,1.0
8,2015-01-09,Those stocks? $AAPL $BABA $TSLA $VA ... If sh...,267,487,877,those stocks baba va if she had let me do i wo...,0.194843,0.482668,0.046,0.8,0.154,1.0
9,2015-01-10,"Join me on tsū, is sharing social revenues wit...",166,394,587,join me on ts is sharing social revenues with ...,0.179049,0.466481,0.061,0.798,0.142,1.0


In [None]:
# calculating sentiment for each row

# initializing an empty list

sntmntprevgoogl = []

for index, row in comb_tweets_googlprev.iterrows():
    neg = row['neg']
    neu = row['neu']
    pos = row['pos']
    if neg > pos:
        snt = "negative"
    elif pos > neg:
        snt = "positive"
    else:
        snt = "neutral"
    sntmntprevgoogl.append(snt)

In [None]:
print(sntmntprevgoogl)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'po

In [None]:
comb_tweets_googlprev['sentiment'] = sntmntprevgoogl

In [None]:
# exporting the first dataframe
comb_tweets_googlprev.to_csv('/content/gdrive/MyDrive/DATA_602_Project/datasets/Googl/comb_tweets_googlprev_snt.csv')