In [12]:
import pydantic
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from openai import OpenAI



In [None]:
KEY = "hello_world" ##### ENTER YOUR OPEN AI KEY HERE

In [13]:
from typing import Literal
from enum import Enum
from pydantic import BaseModel


class Sentiment(str, Enum):
    positive = 'positive'
    negative = 'negative'
    neutral = 'neutral'


class Annotation(BaseModel):
    target: str
    aspect: str
    sentiment: Literal["positive", "negative", "neutral"]
    reasoning: str

In [28]:
# Load the data
data = pd.read_csv('final_dataset/perturbed.csv')
data = data.drop(["snippet", "reasoning"], axis=1)
data

Unnamed: 0.1,Unnamed: 0,sentence,target,aspect,sentiment,language
0,0,Prabhudas Lilladher is bullish on Mahindra and...,Mahindra and Mahindra,Market/Stock Recommendation,positive,original
1,1,Prabhudas Lilader is a boom on Mahindra and Ma...,Mahindra and Mahindra,Market/Stock Recommendation,positive,hi
2,2,Prabhudas Lilladher is up to Mahindra. Mahindr...,Mahindra and Mahindra,Market/Stock Recommendation,positive,zh-cn
3,3,Prabhudas Lilladher is optimistic about Mahind...,Mahindra and Mahindra,Market/Stock Recommendation,positive,ko
4,4,Prabhudas Lilladher is bullish on Mahindra and...,Mahindra and Mahindra,Market/Stock Target Price,positive,original
...,...,...,...,...,...,...
59,59,This was added to the first dividend of RS 6 p...,Shriram Transport,Financial/Revenue Growth,positive,ko
60,60,Net Sales are expected to increase by 78.3 per...,Net Sales,Financial/Revenue Growth,positive,original
61,61,"According to Prabhudas Lilader, 78.3 percent i...",Net Sales,Financial/Revenue Growth,positive,hi
62,62,"According to Prabhudas Lilladher, net sales ar...",Net Sales,Financial/Revenue Growth,positive,zh-cn


In [29]:
system_prompt = """
You are an experienced financial analyst who can understand complex financial statements. You are tasked with analyzing the sentiment of of a sentence with respect to a target entity in the context of the given aspect. Please use the given information and predict the sentiment. Also provide a reasoning for your prediction.

Here are the details:
- A target entity is a company or a product mentioned or a financial entity mentioned in the sentence.
- An aspect is a category or a topic that the target entity is associated with. It is in the format "level1/level2". For example, "Market/Stock Recommendation".


Example:
Input-
sentence: Prabhudas Lilader is a boom on Mahindra and Mahindra in its research report on November 10, 2020 recommended a purchase rating on stock with a target price of Rs 728.
Target: Mahindra and Mahindra
Aspect: Market/Stock Recommendation

Output-
{
    "target": "Mahindra and Mahindra",
    "aspect": "Market/Stock Recommendation",
    "sentiment": "positive",
    "reasoning": "The sentence contains a positive sentiment as the research report recommended a purchase rating on the stock."
}
"""

user_prompt_template = """
Given the sentence: "{sentence}"
With the Target entity: {target}
And the Aspect: {aspect}

Predict the sentiment and provide a reasoning for your prediction.
"""

In [None]:
from typing import List
client = OpenAI(api_key=KEY)

def invoke_llm(sentence: str, aspect: str, target: str) -> Annotation:
    user_prompt = user_prompt_template.format(sentence=sentence, aspect=aspect, target=target)

    completion = client.beta.chat.completions.parse(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        response_format=Annotation,
        temperature=0,
        max_tokens=512,
    )

    return completion.choices[0].message.parsed

In [31]:
import pandas as pd
from tqdm import tqdm
import time
from typing import List
from sklearn.metrics import accuracy_score, classification_report

def evaluate_predictions(df: pd.DataFrame) -> pd.DataFrame:
    """
    Evaluate sentiment predictions for each row in the DataFrame.
    Adds predicted sentiment and compares with ground truth.
    """
    # Create copies for predictions
    df_eval = df.copy()
    df_eval['predicted_sentiment'] = None
    df_eval['predicted_reasoning'] = None
    
    # Process each row
    for idx, row in tqdm(df_eval.iterrows(), total=len(df_eval), desc="Processing predictions"):
        try:
            # Get prediction
            prediction = invoke_llm(
                sentence=row['sentence'],
                aspect=row['aspect'],
                target=row['target']
            )
            
            # Add predictions to DataFrame
            df_eval.loc[idx, 'predicted_sentiment'] = prediction.sentiment
            df_eval.loc[idx, 'predicted_reasoning'] = prediction.reasoning
            
            # Sleep to avoid rate limits
            time.sleep(1)
            
        except Exception as e:
            print(f"Error processing row {idx}: {str(e)}")
            continue
    
    # Calculate metrics
    valid_predictions = df_eval[df_eval['predicted_sentiment'].notna()]
    
    accuracy = accuracy_score(
        valid_predictions['sentiment'],
        valid_predictions['predicted_sentiment']
    )
    
    report = classification_report(
        valid_predictions['sentiment'],
        valid_predictions['predicted_sentiment']
    )
    
    print("\nAccuracy:", accuracy)
    print("\nDetailed Classification Report:")
    print(report)
    
    # Save results
    df_eval.to_csv('evaluation_results.csv', index=False)
    
    # Create comparison DataFrame (create a new DataFrame instead of a view)
    comparison_columns = [
        'sentence', 'target', 'aspect', 
        'sentiment', 'predicted_sentiment',
        'reasoning', 'predicted_reasoning'
    ]
    
    comparison_df = pd.DataFrame(df_eval[comparison_columns].copy())
    comparison_df.loc[:, 'matched'] = comparison_df['sentiment'] == comparison_df['predicted_sentiment']
    comparison_df.to_csv('detailed_comparison.csv', index=False)
    
    return df_eval

In [19]:
test = data.sample(10)

In [26]:
evaluate_predictions(test)

Processing predictions:   0%|          | 0/10 [00:02<?, ?it/s]


KeyboardInterrupt: 