# Financial Sentiment Analysis using LLMs

## Introduction

#### Import Libraries

In [384]:
import openai
import pandas as pd
import re
import plotly.graph_objects as go
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
%matplotlib inline

#### Processing Functions

In [385]:
OPEN_AI_KEY = '...'

def available_openai_engines(filters=None):
    """Get all available OpenAI engine IDs, sorted by version number."""
    try:
        openai.api_key = OPEN_AI_KEY
        response = openai.Engine.list()
        engines = response['data']

        # If a filter is provided, only keep engines that include any of the filter keywords
        if filters:
            engines = [engine for engine in engines if any(keyword in engine['id'] for keyword in filters)]

        # Extract version numbers and sort
        versioned_engines = sorted(engines, key=lambda e: list(map(int, re.findall(r'\d+', e['id']))), reverse=True)

        # Extract only the IDs and create a DataFrame
        ids = [engine['id'] for engine in versioned_engines]
        df = pd.DataFrame(ids, columns=['Engine ID'])
        return df

    except Exception as e:
        print(f"Error while retrieving engines: {e}")


def query_gpt(prompt, model_name):
    """Query the specified GPT model with the provided prompt."""
    try:
        openai.api_key = OPEN_AI_KEY
        response = openai.Completion.create(
          engine=model_name,
          prompt=prompt,
          max_tokens=100
        )
        return response.choices[0].text.strip()
    except Exception as e:
        print(f"Error querying model {model_name}: {e}")
        return None
    
def create_prompts(input_text, instructions):
    """Create a list of prompts by concatenating the instructions and adding to each line of input text."""
    instruction = " ".join(instructions)
    prompts = [instruction + " " + text for text in input_text]
    return prompts


def process_output(output):
    """
    This function takes the raw output from GPT-3, extracts the sentiment, and standardizes it to be in
    uppercase and one of "NEGATIVE", "POSITIVE", or "NEUTRAL".
    """
    output = output.lower()
    if 'negative' in output:
        return 'NEGATIVE'
    elif 'positive' in output:
        return 'POSITIVE'
    elif 'neutral' in output:
        return 'NEUTRAL'
    else:
        return 'UNKNOWN'


def longest_common_starting_substring(strs):
    """
    Given a list of strings, this function returns the longest common starting substring.
    """
    prefix = strs[0]
    for string in strs[1:]:
        while string[:len(prefix)] != prefix:
            prefix = prefix[:-1]
    return prefix

def predict_sentiment(models, prompts):
    """
    This function takes a list of models and prompts as input, 
    and outputs a pandas DataFrame containing the Input Text (obtained by removing the instructions from the prompt) and
    Predicted Sentiment for each model, organized in columns by model.
    """
    # Placeholder for results
    results = []

    # Determine the common instruction from the prompts
    instruction = longest_common_starting_substring(prompts)

    # Iterate over the models and prompts and query the models
    for model in models:
        for prompt in prompts:
            # Split the instruction from the input text
            input_text = prompt.replace(instruction, '').strip()
            
            # Check if we've already queried this model with this input_text
            if not any((result['ID'] == model and result['Input Text'] == input_text) for result in results):
                sentiment_raw = query_gpt(prompt, model)  # Replace `query_gpt` with the function you're using to query the model
                sentiment = process_output(sentiment_raw)
                results.append({'ID': model, 'Input Text': input_text, 'Predicted Sentiment': sentiment})

    # Convert results to DataFrame
    results_df = pd.DataFrame(results)

    # Pivot the DataFrame to organize by input text and model
    results_pivot = results_df.pivot(index='Input Text', columns='ID', values='Predicted Sentiment')

    # Reset the index to return a "regular" dataframe
    return results_pivot.reset_index()

#### Metrics

In [386]:
import pandas as pd
from sklearn.metrics import confusion_matrix
import plotly.graph_objects as go

def plot_multiclass_confusion_matrix(pred_sentiment, true_sentiment, label_names=['NEGATIVE', 'NEUTRAL', 'POSITIVE']):
    # Get model's name from column name
    model_name = pred_sentiment.columns[0]
    
    # Calculate confusion matrix
    cm = confusion_matrix(true_sentiment, pred_sentiment[model_name], labels=label_names)

    # Create a dataframe for better annotation handling
    cm_df = pd.DataFrame(cm, index=label_names, columns=label_names)

    # Create a heatmap
    heat_map = go.Heatmap(
        z=cm_df.values,
        x=list(cm_df.columns),
        y=list(cm_df.index),
        colorscale='Blues',
        showscale=True,
    )

    # Create annotations (hover text)
    annotations = []
    for n, row in enumerate(cm):
        for m, val in enumerate(row):
            var_text = f'{cm[n][m]}'
            annotations.append(
                dict(
                    showarrow=False,
                    text=var_text, 
                    xref='x', 
                    yref='y',
                    x=m,
                    y=n,
                    font=dict(color='white'),
                )
            )

    # Create layout
    layout = go.Layout(
        title_text='Confusion matrix for ' + model_name,  # Use model's name in title
        height=500,
        width=500,
        annotations=annotations
    )

    # Create figure
    fig = go.Figure(data=heat_map, layout=layout)

    # Show figure
    fig.show()


## Data Description

#### Import Data

In [387]:
instruction = [
    "Determine the sentiment of the financial news as negative, neutral or positive.",
    "Please respond with a single word indicating the sentiment of the financial news: 'negative', 'neutral', or 'positive'."
] 
input_text = [
    'Consumer credit $18.9BN, Exp. $16BN, Last $9.6BN.',
    'Estee Lauder Q2 adj. EPS $2.11; FactSet consensus $1.90.',
    'The situation of coated magazine printing paper will continue to be weak',
    'Pre-tax loss totaled euro 0.3 million, compared to a loss of euro 2.2 million in the first quarter of 2005.',
    'Madison Square Garden Q2 EPS $3.93 vs. $3.42.',
    'Boeing announces additional order for 737 MAX planes.',
    'Boeing: Deliveries 24 Jets in November',
    'PPD’s stock indicated in early going to open at $30, or 11% above $27 IPO price.'
]
true_sentiment = ['POSITIVE', 
                  'NEUTRAL',
                  'NEGATIVE',
                  'POSITIVE',
                  'POSITIVE',
                  'NEUTRAL',
                  'POSITIVE',
                  'NEUTRAL']

## Data Preparation

#### Build Prompts 

In [388]:
prompts = create_prompts(input_text, instruction)
prompts

["Determine the sentiment of the financial news as negative, neutral or positive. Please respond with a single word indicating the sentiment of the financial news: 'negative', 'neutral', or 'positive'. Consumer credit $18.9BN, Exp. $16BN, Last $9.6BN.",
 "Determine the sentiment of the financial news as negative, neutral or positive. Please respond with a single word indicating the sentiment of the financial news: 'negative', 'neutral', or 'positive'. Estee Lauder Q2 adj. EPS $2.11; FactSet consensus $1.90.",
 "Determine the sentiment of the financial news as negative, neutral or positive. Please respond with a single word indicating the sentiment of the financial news: 'negative', 'neutral', or 'positive'. The situation of coated magazine printing paper will continue to be weak",
 "Determine the sentiment of the financial news as negative, neutral or positive. Please respond with a single word indicating the sentiment of the financial news: 'negative', 'neutral', or 'positive'. Pre-ta

## Model Evaluation

#### List of Available LLMs

In [389]:
engines = available_openai_engines(filters=['curie', 'davinci'])
display(engines)

Unnamed: 0,Engine ID
0,text-davinci-003
1,text-davinci-002
2,text-davinci-edit-001
3,code-davinci-edit-001
4,text-davinci-001
5,text-curie-001
6,text-similarity-curie-001
7,text-search-davinci-query-001
8,text-search-curie-query-001
9,text-search-curie-doc-001


#### Predict Numerical Sentiment Analysis

In [390]:
llms = ['text-davinci-003', 'text-curie-001']
pred_sentiment = predict_sentiment(llms, prompts)

# Add true_sentiment to DataFrame
results = pred_sentiment.copy()
results['true_sentiment'] = true_sentiment

# Display the DataFrame
display(results)

ID,Input Text,text-curie-001,text-davinci-003,true_sentiment
0,Boeing announces additional order for 737 MAX ...,NEGATIVE,POSITIVE,POSITIVE
1,Boeing: Deliveries 24 Jets in November,POSITIVE,NEGATIVE,NEUTRAL
2,"Consumer credit $18.9BN, Exp. $16BN, Last $9.6BN.",NEGATIVE,NEUTRAL,NEGATIVE
3,Estee Lauder Q2 adj. EPS $2.11; FactSet consen...,NEGATIVE,POSITIVE,POSITIVE
4,Madison Square Garden Q2 EPS $3.93 vs. $3.42.,POSITIVE,NEUTRAL,POSITIVE
5,PPD’s stock indicated in early going to open a...,POSITIVE,POSITIVE,NEUTRAL
6,"Pre-tax loss totaled euro 0.3 million, compare...",NEGATIVE,NEGATIVE,POSITIVE
7,The situation of coated magazine printing pape...,NEGATIVE,NEGATIVE,NEUTRAL


#### Prediction Reasoning 

In [None]:
# Create pred_sentiment_curie
pred_sentiment_curie = results[['text-curie-001']].copy()

# Create pred_sentiment_davinci
pred_sentiment_davinci = results[['text-davinci-003']].copy()

# Create true_sentiment
true_sentiment = results[['true_sentiment']].copy()

# reasoning_curie = prediction_reasoning(pred_sentiment_curie, prompts)
# reasoning_davinci = prediction_reasoning(pred_sentiment_davinci, prompts)


#### Validate Model Performance

In [391]:
plot_multiclass_confusion_matrix(pred_sentiment_curie, true_sentiment)
plot_multiclass_confusion_matrix(pred_sentiment_davinci, true_sentiment)