In [37]:
import pydantic
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from openai import OpenAI



In [None]:
KEY = "hello_world" ### ENTER OPEN AI KEY HERE

In [38]:
from typing import Literal
from enum import Enum
from pydantic import BaseModel


class Sentiment(str, Enum):
    positive = 'positive'
    negative = 'negative'
    neutral = 'neutral'


class Annotation(BaseModel):
    target: str
    aspect: str
    sentiment: Literal["positive", "negative", "neutral"]
    reasoning: str

In [48]:
# Load the data
data = pd.read_csv('final_dataset/perturbed (2).csv')
data = data.drop(["snippet", "reasoning"], axis=1)
data

Unnamed: 0.1,Unnamed: 0,sentence,target,aspect,sentiment,language
0,0,Axis Securities is bullish on Britannia Indust...,Britannia Industries,Market/Stock Target Price,positive,original
1,1,Axis Securities is fast on Britannia Industrie...,Britannia Industries,Market/Stock Target Price,positive,hi
2,2,AXIS Securities's bullishness of British indus...,Britannia Industries,Market/Stock Target Price,positive,zh-cn
3,3,The AXIS Securities recommended the purchase r...,Britannia Industries,Market/Stock Target Price,positive,ko
4,4,Net Sales are expected to decrease by 6.9 perc...,Net Sales,Financial/Revenue Growth,positive,original
...,...,...,...,...,...,...
255,255,Prabhudas Lilladher was optimistic from HDFC B...,HDFC Bank,Market/Stock Target Price,positive,ko
256,256,The RBI has asked HDFC Bank to temporarily hal...,HDFC Bank,Corporate/Product Offering,negative,original
257,257,The RBI has asked HDFC Bank to temporarily sou...,HDFC Bank,Corporate/Product Offering,negative,hi
258,258,Indian Reserve Bank has asked HDFC Bank to tem...,HDFC Bank,Corporate/Product Offering,negative,zh-cn


In [40]:
system_prompt = """
You are an experienced financial analyst who can understand complex financial statements. You are tasked with analyzing the sentiment of of a sentence with respect to a target entity in the context of the given aspect. Please use the given information and predict the sentiment. Also provide a reasoning for your prediction.

Here are the details:
- A target entity is a company or a product mentioned or a financial entity mentioned in the sentence.
- An aspect is a category or a topic that the target entity is associated with. It is in the format "level1/level2". For example, "Market/Stock Recommendation".


Example:
Input-
sentence: Prabhudas Lilader is a boom on Mahindra and Mahindra in its research report on November 10, 2020 recommended a purchase rating on stock with a target price of Rs 728.
Target: Mahindra and Mahindra
Aspect: Market/Stock Recommendation

Output-
{
    "target": "Mahindra and Mahindra",
    "aspect": "Market/Stock Recommendation",
    "sentiment": "positive",
    "reasoning": "The sentence contains a positive sentiment as the research report recommended a purchase rating on the stock."
}
"""

user_prompt_template = """
Given the sentence: "{sentence}"
With the Target entity: {target}
And the Aspect: {aspect}

Predict the sentiment and provide a reasoning for your prediction.
"""

In [None]:
client = OpenAI(api_key=KEY)
def invoke_llm(sentence: str, aspect: str, target: str) -> Annotation:
    user_prompt = user_prompt_template.format(sentence=sentence, aspect=aspect, target=target)

    completion = client.beta.chat.completions.parse(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        response_format=Annotation,
        temperature=0,
        max_tokens=512,
    )

    return completion.choices[0].message.parsed

In [55]:
import pandas as pd
from tqdm import tqdm
import time
from typing import List
from sklearn.metrics import accuracy_score, classification_report, recall_score, precision_score, f1_score

def evaluate_predictions(df: pd.DataFrame) -> pd.DataFrame:
    """
    Evaluate sentiment predictions for each row in the DataFrame.
    Adds predicted sentiment and compares with ground truth.
    """
    # Create copies for predictions
    df_eval = df.copy()
    df_eval['predicted_sentiment'] = None
    df_eval['predicted_reasoning'] = None
    
    # Process each row
    for idx, row in tqdm(df_eval.iterrows(), total=len(df_eval), desc="Processing predictions"):
        try:
            # Get prediction
            prediction = invoke_llm(
                sentence=row['sentence'],
                aspect=row['aspect'],
                target=row['target']
            )
            
            # Add predictions to DataFrame
            df_eval.loc[idx, 'predicted_sentiment'] = prediction.sentiment
            df_eval.loc[idx, 'predicted_reasoning'] = prediction.reasoning
            
            # Sleep to avoid rate limits
            time.sleep(1)
            
        except Exception as e:
            print(f"Error processing row {idx}: {str(e)}")
            continue
    
    # Calculate metrics
    valid_predictions = df_eval[df_eval['predicted_sentiment'].notna()]
    
    accuracy = accuracy_score(
        valid_predictions['sentiment'],
        valid_predictions['predicted_sentiment']
    )
    
    report = classification_report(
        valid_predictions['sentiment'],
        valid_predictions['predicted_sentiment']
    )
    
    print("\nAccuracy:", accuracy)
    print("\nDetailed Classification Report:")
    print(report)
    
    # Save results
    df_eval.to_csv('evaluation_results.csv', index=False)
    
    # Create comparison DataFrame (create a new DataFrame instead of a view)
    comparison_columns = [
        'sentence', 'target', 'aspect', 
        'sentiment', 'predicted_sentiment',
        'reasoning', 'predicted_reasoning'
    ]
    
    comparison_df = pd.DataFrame(df_eval[comparison_columns].copy())
    comparison_df.loc[:, 'matched'] = comparison_df['sentiment'] == comparison_df['predicted_sentiment']
    comparison_df.to_csv('detailed_comparison.csv', index=False)
    
    return df_eval

In [47]:
data

Unnamed: 0.1,Unnamed: 0,sentence,target,aspect,sentiment,language
0,0,Axis Securities is bullish on Britannia Indust...,Britannia Industries,Market/Stock Target Price,positive,original
1,1,Axis Securities is fast on Britannia Industrie...,Britannia Industries,Market/Stock Target Price,positive,hi
2,2,AXIS Securities's bullishness of British indus...,Britannia Industries,Market/Stock Target Price,positive,zh-cn
3,3,The AXIS Securities recommended the purchase r...,Britannia Industries,Market/Stock Target Price,positive,ko
4,4,Net Sales are expected to decrease by 6.9 perc...,Net Sales,Financial/Revenue Growth,positive,original
...,...,...,...,...,...,...
255,255,Prabhudas Lilladher was optimistic from HDFC B...,HDFC Bank,Market/Stock Target Price,positive,ko
256,256,The RBI has asked HDFC Bank to temporarily hal...,HDFC Bank,Corporate/Product Offering,negative,original
257,257,The RBI has asked HDFC Bank to temporarily sou...,HDFC Bank,Corporate/Product Offering,negative,hi
258,258,Indian Reserve Bank has asked HDFC Bank to tem...,HDFC Bank,Corporate/Product Offering,negative,zh-cn


In [19]:
test = data.sample(10)

In [46]:
evaluate_predictions(data)

Processing predictions: 100%|██████████| 260/260 [10:51<00:00,  2.50s/it]



Accuracy: 0.8307692307692308

Detailed Classification Report:
              precision    recall  f1-score   support

    negative       0.59      0.80      0.68        20
     neutral       0.38      1.00      0.55        20
    positive       1.00      0.82      0.90       220

    accuracy                           0.83       260
   macro avg       0.66      0.87      0.71       260
weighted avg       0.92      0.83      0.86       260



KeyError: "['reasoning'] not in index"

In [52]:
eval_data = pd.read_csv("evaluation_results.csv")
eval_data = eval_data.drop("predicted_reasoning", axis=1)

In [58]:
accuracy = accuracy_score(
        eval_data['sentiment'],
        eval_data['predicted_sentiment']
    )

    
report = classification_report(
        eval_data['sentiment'],
        eval_data['predicted_sentiment']
    )
    
print("\nAccuracy:", accuracy)
print("\nDetailed Classification Report:")


print(report)


Accuracy: 0.8307692307692308

Detailed Classification Report:
              precision    recall  f1-score   support

    negative       0.59      0.80      0.68        20
     neutral       0.38      1.00      0.55        20
    positive       1.00      0.82      0.90       220

    accuracy                           0.83       260
   macro avg       0.66      0.87      0.71       260
weighted avg       0.92      0.83      0.86       260

