# Sentiment Analysis on /r/Bitcoin titles and self text

> In this notebook, I will be using the VADER Sentiment Intensity Analyzer to create scores for the scraped bitcoin data

---

## Imports and Data Reading

In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

# easier to see full text with a bigger maxwidth:
pd.options.display.max_colwidth = 200

In [3]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zoenawar/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [4]:
df = pd.read_csv('/Users/zoenawar/DSI/RNN_LSTM_Cryptocurrency_Project/datasets/rbitcoinscrape.csv', index_col=0)

  interactivity=interactivity, compiler=compiler, result=result)


In [5]:
df.head(2)

Unnamed: 0,all_text,easy_time
0,did coinbase just die?Coinbase?,2019-05-13 12:16:55
1,"Hey guys,\n\n&amp;#x200B;\n\nI'm looking for the absolute cheapest way to buy/sell bitcoin (using fiat) and trade with other cryptos. \n\n&amp;#x200B;\n\nMost options for buying have huge fees att...",2019-05-13 12:16:28


In [6]:
df.shape

(701260, 2)

---

## VADER

In [7]:
corpus = list(df['all_text'])
corpus[0]

'did coinbase just die?Coinbase?'

In [8]:
sia = SentimentIntensityAnalyzer()
sia.polarity_scores(corpus[0])

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

## Making a dictionary of scores

In [None]:
dicts = []

for text in corpus:
    try:
        scores = sia.polarity_scores(text)
        scores['all_text'] = text
        dicts.append(scores)
    except AttributeError:
        scores['all_text'] = 'ERROR'
        dicts.append(scores)

dict_df = pd.DataFrame(dicts)
dict_df.head()

In [None]:
dict_df.loc[dict_df['all_text'] == 'ERROR'] # I encountered some errors, which for the purposes of time I've simply removed


In [None]:
dict_df.shape

In [30]:
dict_df = dict_df[dict_df.all_text != 'ERROR']

In [32]:
scores_6year_alltext = dict_df.merge(df, left_on='all_text', right_on='all_text')

In [36]:
scores_6year_alltext.tail()

Unnamed: 0,all_text,compound,neg,neu,pos,easy_time
799212,http://www.dailydot.com/business/bitcoin-offshore-tax-haven/Bitcoin is the offshore tax haven of the future,0.0,0.0,1.0,0.0,2013-10-12 00:38:45
799213,free bitcoin1381566853.0,0.5106,0.0,0.233,0.767,2013-10-12 00:34:13
799214,"Hey Everyone,\n\nI run www.primedice.com , unfortunately right now when you google primedice the first result is a google ad for a clone called\n[PLEASE DO NOT VISIT THIS SITE]: (Primedicebot.com)...",0.8935,0.019,0.855,0.126,2013-10-12 00:31:10
799215,Why the Demise of Silk Road Means Bitcoins Are Here to Stay - tradersnetwork.biz - list trade smile :)1381565277.0,0.3612,0.0,0.865,0.135,2013-10-12 00:07:57
799216,free bitcoin in 30minutes....1381564874.0,0.5106,0.0,0.476,0.524,2013-10-12 00:01:14


In [37]:
scores_6year_alltext.shape

(799217, 6)

In [41]:
scores_6year_alltext.head()

Unnamed: 0,all_text,compound,neg,neu,pos,easy_time
0,did coinbase just die?Coinbase?,0.0,0.0,1.0,0.0,2019-05-13 12:16:55
1,"Hey guys,\n\n&amp;#x200B;\n\nI'm looking for the absolute cheapest way to buy/sell bitcoin (using fiat) and trade with other cryptos. \n\n&amp;#x200B;\n\nMost options for buying have huge fees att...",-0.168,0.042,0.926,0.032,2019-05-13 12:16:28
2,[removed]What is an OpenSource Winemaking?,0.0,0.0,1.0,0.0,2019-05-13 12:15:53
3,BITCOIN ADOPTION IS HERE!! New HTC Smartphone Will Run a Full Node,0.0,0.0,1.0,0.0,2019-05-13 12:15:25
4,😂🤣😂🤣🤑🤑,0.0,0.0,1.0,0.0,2019-05-13 12:14:25


# Now that I've got the scores, I can save them to a csv

In [43]:
scores_6year_alltext.to_csv('/Users/zoenawar/DSI/RNN_LSTM_Cryptocurrency_Project/datasets/rbitcoin_sentiment_scores.csv')
