In [1]:
#import dependencies
import pandas as pd
from datetime import datetime
import json

In [2]:
#load API data
api_tweets_df = pd.read_csv("API_data.csv")
#drop id column
api_tweets_df = api_tweets_df.drop(columns="id")
api_tweets_df.head()

Unnamed: 0,created_at,tweet
0,2021-11-01 00:59:56+00:00,@lj3252 @Teslarati @ResidentSponge Cars since ...
1,2021-11-01 00:59:53+00:00,@elonmusk @garycruz @truth_tesla @zshahan3 @ga...
2,2021-11-01 00:59:46+00:00,@AgnesOfTheDogs @Tesla @telsa Indeed! I have n...
3,2021-11-01 00:59:41+00:00,@elonmusk @garycruz @truth_tesla @zshahan3 @ga...
4,2021-11-01 00:59:37+00:00,Elon Musk says he’ll sell Tesla stock if UN of...


In [3]:
# Convert date column from object to datetime format
api_tweets_df['created_date'] = pd.to_datetime(api_tweets_df['created_at'], format='%Y-%m-%d %H:%M:%S')
# Add a weekday column
api_tweets_df['weekday'] = api_tweets_df['created_date'].dt.day_name()
# Add an hour column
api_tweets_df['hours'] = api_tweets_df['created_date'].dt.strftime('%H')
# Convert date format
api_tweets_df['created_date'] = api_tweets_df['created_date'].dt.strftime('%Y-%m-%d')

# Change column names and reorder columns
api_tweets_df = api_tweets_df[['created_at','created_date','weekday','hours','tweet']]
api_tweets_df.head()

Unnamed: 0,created_at,created_date,weekday,hours,tweet
0,2021-11-01 00:59:56+00:00,2021-11-01,Monday,0,@lj3252 @Teslarati @ResidentSponge Cars since ...
1,2021-11-01 00:59:53+00:00,2021-11-01,Monday,0,@elonmusk @garycruz @truth_tesla @zshahan3 @ga...
2,2021-11-01 00:59:46+00:00,2021-11-01,Monday,0,@AgnesOfTheDogs @Tesla @telsa Indeed! I have n...
3,2021-11-01 00:59:41+00:00,2021-11-01,Monday,0,@elonmusk @garycruz @truth_tesla @zshahan3 @ga...
4,2021-11-01 00:59:37+00:00,2021-11-01,Monday,0,Elon Musk says he’ll sell Tesla stock if UN of...


In [4]:
# Convert tweets to list for cleaning
api_tweet_list = api_tweets_df["tweet"].tolist()

In [5]:
api_tweet_list

["@lj3252 @Teslarati @ResidentSponge Cars since 2016 have all the hardware needed for Full Self Driving.\nElon wouldn't lie, would he?\nhttps://t.co/OKfGdcyvjh",
 '@elonmusk @garycruz @truth_tesla @zshahan3 @garyblack00 @WholeMarsBlog @SawyerMerritt @elonmusk Hi I am trying to Startup an innovating App. Can I share my idea with you . Will you share some  knowledge for the development for the app.., https://t.co/6VuAUnswi3',
 '@AgnesOfTheDogs @Tesla @telsa Indeed! I have no real interest in driving a ginormous overheating laptop battery until that is fixed.',
 '@elonmusk @garycruz @truth_tesla @zshahan3 @garyblack00 @WholeMarsBlog @SawyerMerritt @elonmusk Hi I am trying to Startup an innovating App. Can I share my idea with you . Will you share some  knowledge for the development for the app... (!)https://t.co/6VuAUnswi3',
 'Elon Musk says he’ll sell Tesla stock if UN official can prove how $6billion will solve world\xa0hunger https://t.co/Km6zX19z0j',
 '#doge #dogearmy #dogelon #mars #

In [6]:
# import cleaning function
from Cleaning_PseudoCode.Final_Data_Cleaning import clean_tweets_func

# create function to make text df
def create_text_df():
    text = clean_tweets_func(api_tweet_list)
    text_df = pd.DataFrame({"text":text})
    return text_df

# call create_text_df function
text_df = create_text_df()
text_df

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/samanthavillanueva/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,text
0,"[car, hardware, needed, self, driving, elon, lie]"
1,"[hi, trying, startup, innovating, app, share, ..."
2,"[real, interest, driving, ginormous, overheati..."
3,"[hi, trying, startup, innovating, app, share, ..."
4,"[elon, musk, say, hell, sell, tesla, stock, un..."
...,...
43807,"[thank, cooperation, ..., good, night]"
43808,"[pepsico, claim, first-ever, tesla, semi, truc..."
43809,"[tesla, share, slide, elon, musk, twitter, pol..."
43810,"[tesla's, share, price, fallen, elon, musk's, ..."


In [8]:
# make list to list of strings so model can be applied
text_df["text_string"] = text_df["text"].apply(lambda x: str(x))
text_df.head()

Unnamed: 0,text,text_string
0,"[car, hardware, needed, self, driving, elon, lie]","['car', 'hardware', 'needed', 'self', 'driving..."
1,"[hi, trying, startup, innovating, app, share, ...","['hi', 'trying', 'startup', 'innovating', 'app..."
2,"[real, interest, driving, ginormous, overheati...","['real', 'interest', 'driving', 'ginormous', '..."
3,"[hi, trying, startup, innovating, app, share, ...","['hi', 'trying', 'startup', 'innovating', 'app..."
4,"[elon, musk, say, hell, sell, tesla, stock, un...","['elon', 'musk', 'say', 'hell', 'sell', 'tesla..."


In [9]:
# import vertorizer and model
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle

# load vectorizer and machine learning model
vectorizer = pickle.load(open('Modeling/vectorizer.pkl', 'rb'))
model = pickle.load(open('Modeling/model.pkl', 'rb'))

tweet_predict = text_df["text_string"]
tweet_vectorized = vectorizer.transform(tweet_predict).toarray()
prediction = model.predict(tweet_vectorized)

In [15]:
# create df of scored tweets
prediction_df = pd.DataFrame(prediction, columns=["score"])
# combine dataframes
combined_prediction_df = pd.concat([api_tweets_df, text_df["text"], prediction_df],axis=1)
combined_prediction_df.head()

Unnamed: 0,created_at,created_date,weekday,hours,tweet,text,score
0,2021-11-01 00:59:56+00:00,2021-11-01,Monday,0,@lj3252 @Teslarati @ResidentSponge Cars since ...,"[car, hardware, needed, self, driving, elon, lie]",0
1,2021-11-01 00:59:53+00:00,2021-11-01,Monday,0,@elonmusk @garycruz @truth_tesla @zshahan3 @ga...,"[hi, trying, startup, innovating, app, share, ...",1
2,2021-11-01 00:59:46+00:00,2021-11-01,Monday,0,@AgnesOfTheDogs @Tesla @telsa Indeed! I have n...,"[real, interest, driving, ginormous, overheati...",1
3,2021-11-01 00:59:41+00:00,2021-11-01,Monday,0,@elonmusk @garycruz @truth_tesla @zshahan3 @ga...,"[hi, trying, startup, innovating, app, share, ...",1
4,2021-11-01 00:59:37+00:00,2021-11-01,Monday,0,Elon Musk says he’ll sell Tesla stock if UN of...,"[elon, musk, say, hell, sell, tesla, stock, un...",0


In [16]:
# save scored tweets
combined_prediction_df.to_csv('cleaned_scored_tweets.csv', index = False)
# save to json format 
combined_prediction_df.to_json('cleaned_scored_tweets.json', orient='records')

In [17]:
# separate positive and negative tweets
positive_new_tweets = combined_prediction_df[combined_prediction_df["score"]==1.0]
negative_new_tweets = combined_prediction_df[combined_prediction_df["score"]==0.0]

import plotly.express as px

# Sentiment bar chart
fig = px.bar(combined_prediction_df, x=["Negative","Positive"], y=[len(negative_new_tweets),len(positive_new_tweets)],\
            labels=dict(x="Sentiment", y="Count"))
fig.update_layout(barmode='group', bargap=0.30)
fig.show()
