In [1]:
# Initial imports
import pandas as pd
import numpy as np
from pathlib import Path
import tensorflow as tf

%matplotlib inline

In [2]:
# Set the random seed for reproducibility
from numpy.random import seed
seed(1)

from tensorflow import random
random.set_seed(2)

In [3]:
# Load the dataset
file_path = Path("../Resources/reddit_wsb.csv")
df_wsb = pd.read_csv(file_path, infer_datetime_format = True, parse_dates = True, index_col = "timestamp")
df_wsb.sort_index(inplace=True)

In [4]:
df_wsb.head(10)

Unnamed: 0_level_0,title,score,id,url,comms_num,created,body
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-09-29 03:46:56,$CBAT $SUNW $SPI $OXBR Y'all are missing the p...,4,j1fmmo,https://www.reddit.com/r/wallstreetbets/commen...,11,1601340000.0,$CBAT is going to be a great play further yet:...
2021-01-28 09:08:16,An autists journey. $150 to $68k in one month....,50,l6h58v,https://www.reddit.com/gallery/l6h58v,8,1611818000.0,
2021-01-28 09:08:17,You all have made Melvin live up to its name,70,l6h59b,https://i.redd.it/59sk10r7iyd61.png,7,1611818000.0,
2021-01-28 09:08:18,I got in late on GME but I believe in the caus...,75,l6h5a2,https://www.reddit.com/r/wallstreetbets/commen...,14,1611818000.0,You guys are amazing. Thank you for sending GM...
2021-01-28 09:08:19,Highest SI % of Float End of Business Jan 27th...,26,l6h5am,https://www.reddit.com/r/wallstreetbets/commen...,22,1611818000.0,Will Update Daily At Market Close\n\nTop 5 sec...
2021-01-28 09:08:21,What are you doing to AMC?,67,l6h5b8,https://www.reddit.com/r/wallstreetbets/commen...,55,1611818000.0,Hey retards what the hell is going on with AMC...
2021-01-28 09:08:21,Discord still down,0,l6h5bt,https://www.reddit.com/r/wallstreetbets/commen...,6,1611818000.0,i can't see how to talk to to any anti bot mea...
2021-01-28 09:08:40,HOW CAN I BUY STOCK????!!!,0,l6h5km,https://www.reddit.com/r/wallstreetbets/commen...,12,1611818000.0,I'm from canada and want to know the best trad...
2021-01-28 09:08:41,"Buy games at GameStop, donate to charity (not ...",50,l6h5l4,https://www.reddit.com/r/wallstreetbets/commen...,9,1611818000.0,"Value goes up, kids play video games"
2021-01-28 09:08:47,Hmmmmmm turns out BlackRock is making use of o...,66,l6h5ov,https://i.redd.it/6wka0oehiyd61.jpg,8,1611818000.0,


In [5]:
wsb_X = df_wsb['title']

In [6]:
wsb_X

timestamp
2020-09-29 03:46:56    $CBAT $SUNW $SPI $OXBR Y'all are missing the p...
2021-01-28 09:08:16    An autists journey. $150 to $68k in one month....
2021-01-28 09:08:17         You all have made Melvin live up to its name
2021-01-28 09:08:18    I got in late on GME but I believe in the caus...
2021-01-28 09:08:19    Highest SI % of Float End of Business Jan 27th...
                                             ...                        
2021-04-05 15:22:56    Tesla finally gets an outperform rating and an...
2021-04-05 16:26:32    Still my PR(%), looking to break it soon here....
2021-04-05 16:28:19                          WSB discussion this weekend
2021-04-05 16:35:03    Cane to show my almost identical trade to some...
2021-04-05 16:57:11                   Biggest dating app in China - Momo
Name: title, Length: 44270, dtype: object

In [7]:
from joblib import load
tokenizer = load('tokenizer.joblib')

In [8]:
X_seq = tokenizer.texts_to_sequences(wsb_X)

In [9]:
from tensorflow.keras.models import load_model
model = load_model('rnn_model.h5')

In [10]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
# Set the pad size
max_words = 16

# Pad the sequences using the pad_sequences() method
X_pad = pad_sequences(X_seq, maxlen=max_words, padding="post")

In [11]:
wsb_pred = model.predict_classes(X_pad, batch_size=1)



In [12]:
df_wsb['sentiment_rnn'] = wsb_pred

In [13]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Victor\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [14]:
analyzer = SentimentIntensityAnalyzer()

In [15]:
y_vader_pred = []
y_vader_prob = []

for comment in wsb_X:
    y_vader_prob.append(analyzer.polarity_scores(comment)["pos"])
    sentiment_score = analyzer.polarity_scores(comment)["compound"]
    if sentiment_score >= 0.1:
        y_vader_pred.append(1)
    else:
        y_vader_pred.append(-1)

In [16]:
df_wsb['sentiment_vader'] = y_vader_pred
#df_wsb['score_vader'] = y_vader_prob

df_wsb['minute'] = df_wsb.index.floor('min')

In [None]:
signals_df["Signal"][short_window:] = np.where(
    signals_df["SMA50"][short_window:] > signals_df["SMA100"][short_window:], 1.0, 0.0
)

In [18]:
df_wsb['sentiment_rnn'] = np.where(df_wsb['sentiment_rnn'] == 0, -1, 1)

In [19]:
df_wsb

Unnamed: 0_level_0,title,score,id,url,comms_num,created,body,sentiment_rnn,sentiment_vader,minute
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-09-29 03:46:56,$CBAT $SUNW $SPI $OXBR Y'all are missing the p...,4,j1fmmo,https://www.reddit.com/r/wallstreetbets/commen...,11,1.601340e+09,$CBAT is going to be a great play further yet:...,1,-1,2020-09-29 03:46:00
2021-01-28 09:08:16,An autists journey. $150 to $68k in one month....,50,l6h58v,https://www.reddit.com/gallery/l6h58v,8,1.611818e+09,,1,-1,2021-01-28 09:08:00
2021-01-28 09:08:17,You all have made Melvin live up to its name,70,l6h59b,https://i.redd.it/59sk10r7iyd61.png,7,1.611818e+09,,1,-1,2021-01-28 09:08:00
2021-01-28 09:08:18,I got in late on GME but I believe in the caus...,75,l6h5a2,https://www.reddit.com/r/wallstreetbets/commen...,14,1.611818e+09,You guys are amazing. Thank you for sending GM...,1,-1,2021-01-28 09:08:00
2021-01-28 09:08:19,Highest SI % of Float End of Business Jan 27th...,26,l6h5am,https://www.reddit.com/r/wallstreetbets/commen...,22,1.611818e+09,Will Update Daily At Market Close\n\nTop 5 sec...,-1,-1,2021-01-28 09:08:00
...,...,...,...,...,...,...,...,...,...,...
2021-04-05 15:22:56,Tesla finally gets an outperform rating and an...,156,mkcier,https://www.streetinsider.com/dr/news.php?id=1...,65,1.617625e+09,,1,1,2021-04-05 15:22:00
2021-04-05 16:26:32,"Still my PR(%), looking to break it soon here....",13,mkdgy8,https://i.redd.it/8gcvap1yiar61.jpg,9,1.617629e+09,,1,-1,2021-04-05 16:26:00
2021-04-05 16:28:19,WSB discussion this weekend,37,mkdhtl,https://v.redd.it/3dbe3la7jar61,17,1.617629e+09,,-1,-1,2021-04-05 16:28:00
2021-04-05 16:35:03,Cane to show my almost identical trade to some...,24,mkdl7d,https://i.redd.it/gvejuqxgkar61.jpg,7,1.617630e+09,,-1,-1,2021-04-05 16:35:00


In [20]:
sentiment_df = df_wsb.groupby("minute").mean()[['sentiment_vader', 'sentiment_rnn']]

In [21]:
sentiment_df.to_csv("../Resources/sentiment.csv")