# Import dependencies

In [1]:
# Dataset used: https://www.kaggle.com/datasets/gpreda/elon-musk-tweets

In [2]:
import pandas as pd

### NTLK

In [3]:
import nltk

# Download the lexicon
nltk.download("vader_lexicon")

# Import the lexicon 
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# SentimentIntensityAnalyzer.polarity_score()function provides the polarity of the text rendering the dictionary format
# of 4 keys neg, neu, pos and compound
# neg, neu, and pos should add to 1
# Compound is overall and is between -1 and 1
# Create an instance of SentimentIntensityAnalyzer
sent_analyzer = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/jennifershulyak/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [4]:
# Example! 
sentence = "VADER is pretty good at identifying the underlying sentiment of a text!"
print(sent_analyzer.polarity_scores(sentence))

{'neg': 0.0, 'neu': 0.585, 'pos': 0.415, 'compound': 0.75}


### Import Musk File and Explore!

In [5]:
# Read in and look at twitter data
pd.set_option('display.max_colwidth', 100)
df = pd.read_csv("elon_musk_tweets.csv")
df.head()

Unnamed: 0,id,user_name,user_location,user_description,user_created,user_followers,user_friends,user_favourites,user_verified,date,text,hashtags,source,retweets,favorites,is_retweet
0,1544379368478212100,Elon Musk,,"Mars & Cars, Chips & Dips",2009-06-02 20:12:29+00:00,101240855,115,13503,True,2022-07-05 17:55:09+00:00,@BillyM2k I find the gold toe sock – inevitably off kilter &amp; washed out – a little troubling...,,Twitter for iPhone,335,6542,False
1,1544377493263720450,Elon Musk,,"Mars & Cars, Chips & Dips",2009-06-02 20:12:29+00:00,101240806,115,13503,True,2022-07-05 17:47:42+00:00,"Sock Con, the conference for socks",,Twitter for iPhone,1451,30753,False
2,1544377130590552064,Elon Musk,,"Mars & Cars, Chips & Dips",2009-06-02 20:12:29+00:00,101240806,115,13503,True,2022-07-05 17:46:15+00:00,Always something new for the magazine cover and the articles practically write themselves,,Twitter for iPhone,1284,28610,False
3,1544375575724400645,Elon Musk,,"Mars & Cars, Chips & Dips",2009-06-02 20:12:29+00:00,101240806,115,13503,True,2022-07-05 17:40:05+00:00,@ExplainThisBob This guy gets it,,Twitter for iPhone,131,3640,False
4,1544375148605853699,Elon Musk,,"Mars & Cars, Chips & Dips",2009-06-02 20:12:29+00:00,101240806,115,13503,True,2022-07-05 17:38:23+00:00,Sock tech is so advanced that you can get pretty much anything in sock form these days!,,Twitter for iPhone,1191,23790,False


In [6]:
# cut df to just be the "date" and "text" columns
df_justtweets = df.loc[:, ["date", "text"]]
print("The number of tweets in this dataset: {}".format(len(df_justtweets)))
df_justtweets.head()


The number of tweets in this dataset: 4154


Unnamed: 0,date,text
0,2022-07-05 17:55:09+00:00,@BillyM2k I find the gold toe sock – inevitably off kilter &amp; washed out – a little troubling...
1,2022-07-05 17:47:42+00:00,"Sock Con, the conference for socks"
2,2022-07-05 17:46:15+00:00,Always something new for the magazine cover and the articles practically write themselves
3,2022-07-05 17:40:05+00:00,@ExplainThisBob This guy gets it
4,2022-07-05 17:38:23+00:00,Sock tech is so advanced that you can get pretty much anything in sock form these days!


# Identify Tweets with Crypto buzzwords

In [7]:
# Buzzwords 
buzzwords = ["bitcoin", "doge", "crypto"]
def check_for_crypto(tweet):
    return tweet

# Vader

In [8]:
# Function to gives a polarity rather than numerical output
def format_output(output_dict):
  
  polarity = "neutral"

  if(output_dict['compound']>= 0.05):
    polarity = "positive"

  elif(output_dict['compound']<= -0.05):
    polarity = "negative"

  return polarity

In [9]:
# Function to apply each row of the df to the sent_analyzer
def predict_sentiment(text):
  
  output_dict =  sent_analyzer.polarity_scores(text)
  return format_output(output_dict)

In [10]:
# Run the predictions
df_justtweets["vader_prediction"] = df_justtweets["text"].apply(predict_sentiment)
df_justtweets

Unnamed: 0,date,text,vader_prediction
0,2022-07-05 17:55:09+00:00,@BillyM2k I find the gold toe sock – inevitably off kilter &amp; washed out – a little troubling...,negative
1,2022-07-05 17:47:42+00:00,"Sock Con, the conference for socks",neutral
2,2022-07-05 17:46:15+00:00,Always something new for the magazine cover and the articles practically write themselves,neutral
3,2022-07-05 17:40:05+00:00,@ExplainThisBob This guy gets it,neutral
4,2022-07-05 17:38:23+00:00,Sock tech is so advanced that you can get pretty much anything in sock form these days!,positive
...,...,...,...
4149,2023-03-27 10:28:54+00:00,@CatherinScience That we extend consciousness to the stars and understand the Universe,positive
4150,2023-03-27 10:24:39+00:00,@cb_doge Twitter is the news,neutral
4151,2023-03-27 10:23:57+00:00,@Rainmaker1973 That’s what people who have the woke mind virus look like to me,positive
4152,2023-03-27 10:17:47+00:00,@Rainmaker1973 Reminds me of The Marker from Dead Space,negative


# Keep just the crypto tweets

In [11]:
# Take just the tweets about cryt
df_justtweets[df_justtweets["text"].str.contains('doge|Doge|crypto|Crypto|bitcoin|Bitcoin')]

Unnamed: 0,date,text,vader_prediction
9,2022-07-06 18:02:11+00:00,@mishaboar @boringcompany Supporting Doge wherever possible,positive
23,2022-07-07 20:58:09+00:00,@cb_doge Not bad! \n\nIt is high time that the future looked like the future.\n\nSame goes for f...,positive
56,2022-07-12 05:49:03+00:00,@cb_doge But Fifth Element was great,positive
75,2022-07-13 06:17:07+00:00,@cb_doge Ancient times,neutral
105,2022-07-15 21:15:56+00:00,@teslaownersSV @cb_doge It’s a flat trade,neutral
...,...,...,...
4112,2023-03-28 02:53:27+00:00,@cb_doge Trying my best for the humans,positive
4139,2023-03-27 17:33:12+00:00,@cb_doge That was wild,neutral
4140,2023-03-27 17:32:54+00:00,@HumbleBitcoiner US foreign policy now dropping below bronze tier,neutral
4150,2023-03-27 10:24:39+00:00,@cb_doge Twitter is the news,neutral


In [12]:
df_just_crypto_tweet = df_justtweets.loc[:, ["date", "vader_prediction"]]
df_just_crypto_tweet

Unnamed: 0,date,vader_prediction
0,2022-07-05 17:55:09+00:00,negative
1,2022-07-05 17:47:42+00:00,neutral
2,2022-07-05 17:46:15+00:00,neutral
3,2022-07-05 17:40:05+00:00,neutral
4,2022-07-05 17:38:23+00:00,positive
...,...,...
4149,2023-03-27 10:28:54+00:00,positive
4150,2023-03-27 10:24:39+00:00,neutral
4151,2023-03-27 10:23:57+00:00,positive
4152,2023-03-27 10:17:47+00:00,negative


In [13]:
df_just_crypto_tweet.to_csv("../Sentiments/EMusk_tweets.csv")