**Import Libraries**

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset
from sklearn.metrics import classification_report
import pandas as pd


**Import Data**

In [2]:
FiQA_FinancialPhrasebank = pd.read_csv("/kaggle/input/sentimentfinancial/fiqaFinancialPhrasebank.csv") 
Twitter_Sentiment = pd.read_csv("/kaggle/input/sentimentfinancial/twitter_financial_news_sentiment.csv") 


**Class to Predict/Evaluate NLP Models**

In [3]:
class SentimentClassifier:
    def __init__(self, model_name, dataset=None, sentiment_dict=None):
        """
        Args:
        - model_name: the name of the pre-trained language model to use
        - dataset (optional): the dataframe the model will be used on
        - sentiment_dict (optional): a dictionary mapping sentiment labels to integers
        """
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
        self.sentiment_dict = sentiment_dict or {"positive": 1, "neutral": 0, "negative": -1}
        self.reverse_sentiment_dict = {value: key for key, value in self.sentiment_dict.items()}
        self.dataset = None
        if dataset is not None:
            self.dataset = self.preprocess(dataset.copy())

    def preprocess(self, dataset):
        """
        Preprocess the df by converting the sentiment labels to integers and converting to Hugging Face dataset.
        """
        if self.dataset is None:
            print("No dataset provided. Preprocessing cannot be performed.")
            return None
        try:
            dataset['Sentiment'] = dataset['Sentiment'].apply(lambda x: self.sentiment_dict[x])
            dataset = Dataset.from_pandas(dataset)
            return dataset
        except:
            print("Returning all 0's for sentiment labels.")
            dataset['Sentiment'] = 0
            dataset = Dataset.from_pandas(dataset)
            return dataset

    def preprocess(self, dataset):
        """
        Preprocess the df by converting the sentiment labels to integers and converting to Hugging Face dataset.
        """
        try:
            dataset['Sentiment'] = dataset['Sentiment'].apply(lambda x: self.sentiment_dict[x])
            dataset = Dataset.from_pandas(dataset)
            return dataset
        except:
            print("Returning all 0's for sentiment labels.")
            dataset['Sentiment'] = 0
            dataset = Dataset.from_pandas(dataset)
            return dataset

    def predict(self, texts, batch_size=None):
        """
        Apply tokenizer and predict the sentiment label.
        """
        if batch_size is None:
            # Predict all texts at once if batch size is not provided
            encoded_texts = self.tokenizer(texts, padding=True, truncation=True, max_length=32, return_tensors="pt")
            encoded_texts = encoded_texts.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
            outputs = self.model(**encoded_texts)
            predicted_labels = torch.argmax(outputs.logits, dim=1)
            return predicted_labels.tolist()
        else:
            # Predict texts in batches of given size
            predicted_labels = []
            num_batches = len(texts) // batch_size + int(len(texts) % batch_size > 0)
            for i in range(num_batches):
                start_idx = i * batch_size
                end_idx = min((i + 1) * batch_size, len(texts))
                batch = texts[start_idx:end_idx]
                encoded_texts = self.tokenizer(batch, padding=True, truncation=True, max_length=32, return_tensors="pt")
                encoded_texts = encoded_texts.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
                outputs = self.model(**encoded_texts)
                batch_predicted_labels = torch.argmax(outputs.logits, dim=1).tolist()
                predicted_labels.extend(batch_predicted_labels)
            return predicted_labels

    def evaluate(self):
        """
        Evaluate the performance of the sentiment classifier using a classification report. Had to batch due to GPU memory limitations. 
        """
        batch_size = 32
        true_labels = []
        predicted_labels = []

        for i in range(0, len(self.dataset), batch_size):
            batch = self.dataset[i:i+batch_size]
            texts = batch["Sentence"]
            labels = batch["Sentiment"]
            batch_predicted_labels = self.predict(texts)
            true_labels.extend(labels)
            predicted_labels.extend(batch_predicted_labels)
    
        true_labels = [self.reverse_sentiment_dict[label] for label in true_labels] # mapping integer back to sentiment labels
        predicted_labels = [self.reverse_sentiment_dict[label] for label in predicted_labels] # mapping integer back to sentiment labels
        report = classification_report(true_labels, predicted_labels)
        return report


**Evaluate Performance of Financial NLP Sentiment Models**

In [4]:
models = [{"model_path": "nickmuchi/finbert-tone-finetuned-fintwitter-classification", "sentiment_dict": {"negative": 0, "positive": 1, "neutral": 2}},
          {"model_path": "yiyanghkust/finbert-tone", "sentiment_dict": {"neutral": 0, "positive": 1, "negative": 2}},    
          {"model_path": "ahmedrachid/FinancialBERT-Sentiment-Analysis", "sentiment_dict": {"negative": 0, "neutral": 1, "positive": 2}}]

reports_FiQA_FinancialPhraseBank = []
reports_Twitter_Sentiment = []

for model in models:
    classifier = SentimentClassifier(model_name=model["model_path"], dataset=FiQA_FinancialPhrasebank, sentiment_dict=model["sentiment_dict"])
    report = classifier.evaluate()
    reports_FiQA_FinancialPhraseBank.append(report)

    classifier = SentimentClassifier(model_name=model["model_path"], dataset=Twitter_Sentiment, sentiment_dict=model["sentiment_dict"])
    report = classifier.evaluate()
    reports_Twitter_Sentiment.append(report)

combined_reports = {"FiQA_FinancialPhraseBank": reports_FiQA_FinancialPhraseBank, "Twitter_Sentiment": reports_Twitter_Sentiment}


Downloading (…)okenizer_config.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/853 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/439M [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/533 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/439M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/369 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/464k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/789 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/439M [00:00<?, ?B/s]

In [5]:
for dataset, reports in combined_reports.items():
    print(f"Dataset: {dataset}")
    for i, report in enumerate(reports):
        print(f"Model {i + 1}:\n{report}")
        

Dataset: FiQA_FinancialPhraseBank
Model 1:
              precision    recall  f1-score   support

    negative       0.53      0.67      0.59       860
     neutral       0.70      0.84      0.76      3130
    positive       0.89      0.49      0.63      1852

    accuracy                           0.70      5842
   macro avg       0.71      0.67      0.66      5842
weighted avg       0.74      0.70      0.70      5842

Model 2:
              precision    recall  f1-score   support

    negative       0.51      0.58      0.54       860
     neutral       0.68      0.83      0.75      3130
    positive       0.87      0.49      0.63      1852

    accuracy                           0.69      5842
   macro avg       0.69      0.63      0.64      5842
weighted avg       0.72      0.69      0.68      5842

Model 3:
              precision    recall  f1-score   support

    negative       0.53      0.64      0.58       860
     neutral       0.76      0.81      0.78      3130
    positive  

Model 3 has demonstrated the most promising level of generalizability and will therefore be selected as the preferred model for future use. It is important to acknowledge that while Model 1 performs exceptionally well on the Twitter_Sentiment dataset, this can likely be attributed to its pre-training on very similar data, resulting in potential overfitting. This is evidenced by the notably lower performance on the FiQA_FinancialPhrasebank dataset.

**Import Scraped Twitter Data from Major Source Outlets**

In [6]:
twitter_df = pd.read_csv("/kaggle/input/twitter/Twitter_Data.csv") #scraped twitter df for model to predict on
twitter_df.head(5)


Unnamed: 0,content,date,username,sector
0,"Indonesia, the world’s biggest palm oil produc...",2022-12-29,BNCommodities,Commodities
1,Exxon is suing the European Union in a push to...,2022-12-28,BNCommodities,Commodities
2,The states hit hardest by blackouts in last we...,2022-12-28,BNCommodities,Commodities
3,China broke ground on an 80 billion yuan ($11 ...,2022-12-28,BNCommodities,Commodities
4,"Lithium’s going to get less expensive in 2023,...",2022-12-28,BNCommodities,Commodities


**Predict Sentiment on Scraped Twitter Data**

In [7]:
classifier = SentimentClassifier("ahmedrachid/FinancialBERT-Sentiment-Analysis")
texts = twitter_df['content'].to_list()
batch_size = 256
predicted_labels = classifier.predict(texts=texts, batch_size=batch_size)
twitter_df['Sentiment'] = predicted_labels

twitter_df.head()


Unnamed: 0,content,date,username,sector,Sentiment
0,"Indonesia, the world’s biggest palm oil produc...",2022-12-29,BNCommodities,Commodities,2
1,Exxon is suing the European Union in a push to...,2022-12-28,BNCommodities,Commodities,1
2,The states hit hardest by blackouts in last we...,2022-12-28,BNCommodities,Commodities,0
3,China broke ground on an 80 billion yuan ($11 ...,2022-12-28,BNCommodities,Commodities,2
4,"Lithium’s going to get less expensive in 2023,...",2022-12-28,BNCommodities,Commodities,1


**Apply Sentiment Mapping (Numerical to Labels)**

In [9]:
sentiment_dict = {0: "negative", 1: "neutral", 2: "positive"}
twitter_df['Sentiment'] = twitter_df['Sentiment'].map(sentiment_dict)
twitter_df.head()


Unnamed: 0,content,date,username,sector,Sentiment
0,"Indonesia, the world’s biggest palm oil produc...",2022-12-29,BNCommodities,Commodities,positive
1,Exxon is suing the European Union in a push to...,2022-12-28,BNCommodities,Commodities,neutral
2,The states hit hardest by blackouts in last we...,2022-12-28,BNCommodities,Commodities,negative
3,China broke ground on an 80 billion yuan ($11 ...,2022-12-28,BNCommodities,Commodities,positive
4,"Lithium’s going to get less expensive in 2023,...",2022-12-28,BNCommodities,Commodities,neutral


**Save as CSV for EDA**

In [10]:
twitter_df.to_csv('twitter_data.csv', index=False)
