In [None]:
#### Part 3. Sentiment Analysis
# Guide for code: https://www.youtube.com/watch?v=DFtP1THE8fE

In [None]:
# Download Flair at https://pytorch.org/get-started/locally/
# If you have a dedicated graphics card, you might be able to leverage the CUDA cores
import flair
import pandas as pd
pd.set_option('display.max_colwidth', None)

In [None]:
# Might require downloading for first time
# Loads the English sentiment classifier
sentiment_model = flair.models.TextClassifier.load('en-sentiment')

In [None]:
# Create the Pandas dataframe. Can select a different dataset if desired.
df = pd.read_csv('RedditData.csv', sep=',')
df.head()

In [None]:
# Removes duplicates from data being analyzed [title of /r/Bitcoin post] for normalization
df.drop_duplicates(subset='title', keep='first',inplace=True)

In [None]:
# Tokenize the sentences and predict the sentiment of each
# This might take some time to complete

sentiment = []
confidence = []

for sentence in df['title']:
    # Tokenizing sentence
    sample = flair.data.Sentence(sentence)
    # Make prediction with tokenized sentence
    sentiment_model.predict(sample)
    
    # If topic is empty, input empty string
    if sentence.strip() == "":
        sentiment.append("")
        confidence.append("")
    
    # If there's text, add sentiment data to list
    else:
        sentiment.append(sample.labels[0].value)
        confidence.append(sample.labels[0].score)

In [None]:
# Add the sentiment values and confidence scores to dataframe

df['sentiment'] = sentiment
df['confidence'] = confidence

In [None]:
df.to_csv('SentimentAnalysis.csv')

In [None]:
# Getting Data ready for Visualization

bitdata = pd.read_csv('BitcoinData.csv')

In [None]:
df['date'] = pd.to_datetime(df['date'])
bitdata['time'] = pd.to_datetime(bitdata["time"])

In [None]:
# Creating a new Dataframe to plot the Bitcoin Price vs. highest Sentiment Value in the hour
# Might take a little bit of time to complete

sentiment = []
score = []
dates = []
low = []
high = []
openv = []
closev = []
volume = []

for idx, subpost in enumerate(bitdata['time'], start=0):
    bday = int(subpost.dayofyear)
    bhour = int(subpost.hour)
    nTotal = 0
    pTotal = 0
    try:
        for index, ent in enumerate(df['date'], start=0):
            sday = int(ent.dayofyear)
            shour = int(ent.hour)
            # If post was made in an hour, add up the total Positive and Negative Values
            if ((sday == bday) and (shour == bhour)):
                if (df.loc[index, "sentiment"] == 'NEGATIVE'):
                    nTotal += 1
                else:
                    pTotal += 1
        # If the total Negative is greater than positive, calculate percentage of negative comments
        #    in the hour and add to score
        if (nTotal > pTotal):
            sentiment.append("NEGATIVE")
            score.append(nTotal/(nTotal + pTotal))
            dates.append(subpost)
            low.append(bitdata.loc[idx, 'low'])
            high.append(bitdata.loc[idx, 'high'])
            openv.append(bitdata.loc[idx, 'open'])
            closev.append(bitdata.loc[idx, 'close'])
            volume.append(bitdata.loc[idx, 'volume'])
        # If the total Positive is greater than negative, calculate percentage of positive comments
        #    in the hour and add to score.
        elif (pTotal > nTotal):
            sentiment.append("POSITIVE")
            score.append(pTotal/(nTotal + pTotal))
            dates.append(subpost)
            low.append(bitdata.loc[idx, 'low'])
            high.append(bitdata.loc[idx, 'high'])
            openv.append(bitdata.loc[idx, 'open'])
            closev.append(bitdata.loc[idx, 'close'])
            volume.append(bitdata.loc[idx, 'volume'])
        # If both Pos and Neg are equal, the sentiment is Neutral. Set score to 0
        else:
            sentiment.append("NEUTRAL")
            score.append(0)
            dates.append(subpost)
            low.append(bitdata.loc[idx, 'low'])
            high.append(bitdata.loc[idx, 'high'])
            openv.append(bitdata.loc[idx, 'open'])
            closev.append(bitdata.loc[idx, 'close'])
            volume.append(bitdata.loc[idx, 'volume'])
    except KeyError:
        continue
    else:
        continue

In [None]:
# Create a dataframe from the values collected

sent_time_df = pd.DataFrame(dates, columns = ['date'])
sent_time_df['sentiment'] = sentiment
sent_time_df['score'] = score
sent_time_df['low'] = low
sent_time_df['high'] = high
sent_time_df['open'] = openv
sent_time_df['close'] = closev
sent_time_df['volume'] = volume

In [None]:
# Save the data for Version Control

sent_time_df.to_csv('SentimentOverTime.csv')

In [None]:
# Bitcoin Price vs Sentiment Value Data Visualization
# Trying to see if the value of Bitcoin is correlative to the sentiment value on
#    /r/Bitcoin

import plotly
import plotly.graph_objects as go
from datetime import datetime
import plotly.io as pio
from plotly.subplots import make_subplots
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)
import plotly.express as px

In [None]:
# Creating a list of colors to represent the Sentiment Value
# If the highest sentiment for the hour is NEGATIVE, then color is red. Green in Positive

red = 'rgb(222,0,0)'
green = 'rgb(0,222,0)'
colors = [red if sent_time_df.loc[index, 'sentiment'] == 'NEGATIVE' else green for index in range(len(sent_time_df))]

sent_time_df['color'] = colors

In [None]:
# Plotly visualization for Bitcoin Data
# May not show up. Can use Jupyter Notebook to view

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Candlestick Bitcoin price
trace1 = go.Candlestick(x=sent_time_df['date'],
                open=sent_time_df['open'], high=sent_time_df['high'],
                low=sent_time_df['low'], close=sent_time_df['close'], name="Bitcoin Price")

# Create a go.Bar to simplify viewing data
trace2 = go.Bar(x=sent_time_df['date'], y=sent_time_df['score'], name="Sentiment", marker_color=sent_time_df['color'])

# Add traces to figure
fig.add_trace(trace1, secondary_y=True)
fig.add_trace(trace2, secondary_y=False)

fig.layout.yaxis2.showgrid=False
fig.show("notebook")