In [2]:
import numpy as np
import pandas as pd



In [3]:
tweets_df = pd.read_csv("./datasets/$OCEAN tweets dataset.csv")

In [4]:
sentiment_scores_df = pd.read_csv("./derived_datasets/twitter-roberta-base-sentiment.csv")

In [5]:
tweets_sentiment_df = tweets_df.merge(sentiment_scores_df,
                                      left_on="id",
                                      right_on="tweet_id")

In [11]:
# here I sample 100 tweets to label by hand
# I noticed that some tweets were impossible to classify
# as positive or negative because they were in another language
# or were referencing a reply that was not visible
np.random.seed(100101)
labels = ['negative', 'neutral', 'positive']
# tweets_sentiment_df.sample(100).to_excel("./derived_datasets/sampled_tweets.xls")

In [6]:
clean_labels_from_google_drive = pd.read_csv("./derived_datasets/labels.csv")

In [14]:
label_mapping = {1: "positive", 0: "neutral", -1: "negative", "-": "unknown"}

In [12]:
# Here I convert the predictions of the sentiment classifier into the same format as the labels I did by hand
# I take the highest probability as the prediction
tweets_sentiment_df["sentiment_classification"] = tweets_sentiment_df[labels].apply(np.argmax, axis=1) - 1

In [7]:
clean_labels_from_google_drive = pd.read_csv("./derived_datasets/sampled_tweets.csv")

In [27]:
# we join on tweet because the ids got mangled by excel
joined_df = tweets_sentiment_df.merge(
    clean_labels_from_google_drive[["tweet", "Labels"]],
    on="tweet",
    suffixes=["", "_label"])[["tweet", "sentiment_classification", "Labels"]].drop_duplicates()
# we have to drop duplicates
# because of duplicate tweets

In [18]:
# here we compare the labels with the predictions
joined_df.groupby(["sentiment_classification", "Labels"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,tweet
sentiment_classification,Labels,Unnamed: 2_level_1
-1,-,1
-1,-1,2
-1,0,1
0,-,4
0,-1,5
0,0,21
0,1,14
1,-1,1
1,0,15
1,1,36


In [19]:
# Here we find I was unable to classify 5% of the tweets
filt_df = joined_df.query("Labels != '-'")
joined_df.query("Labels == '-'").shape[0] / joined_df.shape[0]

0.05

In [23]:
filt_df.groupby("Labels").count()

Unnamed: 0_level_0,tweet,sentiment_classification
Labels,Unnamed: 1_level_1,Unnamed: 2_level_1
-1,8,8
0,37,37
1,50,50


In [24]:
# Here we find our accuracy is 62%, although we see that half of the tweets
# are positive, so the null model would have 50% accuracy
np.mean(filt_df["Labels"].apply(int) == filt_df["sentiment_classification"])

0.6210526315789474

In [25]:
# here we find that 99% of labels are at most once step apart, ie, unknown to positive and unknown to negative
np.mean(np.abs(filt_df["Labels"].apply(int) - filt_df["sentiment_classification"]) <= 1)

0.9894736842105263