In [1]:
#Import required packages
import pandas as pd
import time
import requests
time_counter = time.time()
import datetime as dt
import pendulum

In [2]:
#Get the reddit comments
data = pd.read_csv('reddit.csv')
doc = data.iloc[:,0].to_list()

In [3]:
#Perform sentiment analysis on the reddit comments
from textblob import TextBlob

def sentiment(x):
    '''Returns the polarity of the text'''
    sentiment = TextBlob(x)
    return sentiment.sentiment.polarity

def timezone(utc):
    '''Converts UTC to EST timezone'''
    t = pendulum.from_timestamp(utc, 'EST').to_datetime_string()
    return t

In [4]:
#Dataframe cleaning
data['sentiment'] = data['body'].apply(sentiment)
data['date'] = data['created_utc'].apply(timezone)
data['date'] =  pd.to_datetime(data['date'], format='%Y-%m-%d')
data['date'] = data['date'].dt.date

In [5]:
#Categorize positive, negative and neutral comments based on the polarity
data['positive'] = [1 if x >= 0.05 else 0 for x in data.sentiment]
data['negative'] = [1 if x <= -0.05 else 0 for x in data.sentiment]
data['neutral'] = [1 if x <= 0.05 and x >= -0.05  else 0 for x in data.sentiment]

In [6]:
#Count the number of positive, negative and neutral comments for each day
comments = data.groupby('date').agg({'sentiment': 'mean', 'body': 'count', 'positive' : 'sum', 'negative' : 'sum', 'neutral' : 'sum' }).copy().reset_index()

In [7]:
#Rename columns
comments = comments.rename(columns={"sentiment": "comments sentiment", "body": "number of comments", 
                                    "positive" : "positive comments", "negative" : "negative comments",
                                   "neutral" : "neutral comments"})
comments =comments.drop(comments.index[0]).reset_index()

In [8]:
#Get the data for the news
news = pd.read_csv('news.csv')

In [9]:
#Dataframe cleaning
news['news sentiment'] = news['Headline'].apply(sentiment)  #Perform sentiment analysis on the news headlines
news['Date'] =  pd.to_datetime(news['Date'])
news['Date'] = news['Date'].dt.date

In [10]:
#Categorize positive, negative and neutral news based on the polarity
news['positive'] = [1 if x >= 0.05 else 0 for x in news['news sentiment']]
news['negative'] = [1 if x <= -0.05 else 0 for x in news['news sentiment']]
news['neutral'] = [1 if x <= 0.05 and x >= -0.05  else 0 for x in news['news sentiment']]

In [11]:
#Count the number of positive, negative and neutral news for each day
news_m = news.groupby('Date').agg({'news sentiment': 'mean', 'Headline': 'count', 'positive' : 'sum', 'negative' : 'sum', 
                                   'neutral' : 'sum' }).copy().reset_index()
news_m = news_m.rename(columns={"Headline": "number of news", 
                                    "positive" : "positive news", "negative" : "negative news",
                                   "neutral" : "neutral news"})

In [12]:
#Make a consolidated dataframe
final = pd.concat([news_m, comments], axis = 1)
final.drop(final.columns[[6,7]],axis =1, inplace = True)

In [13]:
#Get data for the change in stock price and volume traded
stock = pd.read_csv('stock.csv')
stock['Date'] =  pd.to_datetime(stock['date'])
stock['Date'] = stock['Date'].dt.date
stock.drop(['date'], axis =1, inplace = True)

In [14]:
#Make a consolidated dataframe
final = final.merge(stock, on ='Date', how = 'left')

In [15]:
#Save the final output
final.to_csv('final.csv',index=False)