In [23]:
import pandas

# reading csv file
tweets_df = pandas.read_csv("data/08_data.csv")[["airline_sentiment", "text"]]

In [24]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def get_sentiment(compound_score, negative_threshold, positive_threshold):
    if compound_score < negative_threshold:
        return "negative"
    elif compound_score > positive_threshold:
        return "positive"
    else:
        return "neutral"
    
def print_stats(expected_col_name, result_col_name):
    accuracy = accuracy_score(tweets_df[expected_col_name], tweets_df[result_col_name])
    precision = precision_score(tweets_df[expected_col_name], tweets_df[result_col_name], average="weighted")
    recall = recall_score(tweets_df[expected_col_name], tweets_df[result_col_name], average="weighted")
    f1 = f1_score(tweets_df[expected_col_name], tweets_df[result_col_name], average="weighted")

    print(f"accuracy: {accuracy:5}")
    print(f"precision: {precision:5}")
    print(f"recall: {recall:5}")
    print(f"f1: {f1:5}")

In [28]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
    
# nltk.download('vader_lexicon')

sia = SentimentIntensityAnalyzer()
tweets_df["compound_scores"] = tweets_df["text"].apply(lambda tweet: sia.polarity_scores(tweet)["compound"])
tweets_df["vader_sentiment"] = tweets_df["compound_scores"].apply(lambda score: get_sentiment(score, -0.05, 0.05))

# print statistics
print_stats("airline_sentiment", "vader_sentiment")

accuracy: 0.542827868852459
precision: 0.6985683472029285
recall: 0.542827868852459
f1: 0.5653261711946944


In [27]:
import spacy
from textblob import TextBlob

tweets_df["textblob_polarity"] = tweets_df["text"].apply(lambda tweet: TextBlob(tweet).sentiment.polarity)
tweets_df["textblob_sentiment"] = tweets_df["textblob_polarity"].apply(lambda polarity: get_sentiment(polarity, 0, 0))

# print statistics
print_stats("airline_sentiment", "textblob_sentiment")

accuracy: 0.46434426229508197
precision: 0.6735335948663703
recall: 0.46434426229508197
f1: 0.4765955707495987
