## Preliminaries

In [None]:
%pip install anthropic

In [None]:
from google.colab import userdata
from anthropic import Anthropic
import pandas as pd
import numpy as np
import json
import re
import requests
import warnings

warnings.filterwarnings("ignore")

In [None]:
# Initialize Anthropic client with API key from Colab secrets
client = Anthropic(api_key=userdata.get("ANTHROPIC_API_KEY"))

## Load and Prepare Data

In [None]:
# Load the Fed sentiment training data
url = 'https://www.dropbox.com/scl/fi/i2esmtinb4qor0mzokybp/fed_sentiment_training.csv?rlkey=v9u7afunmy8w0v0lwizba5g25&dl=1'
df = pd.read_csv(url, sep='\t')

# Perform train-test split
n = len(df)
test_size = int(0.1 * n)
indices = np.random.RandomState(95).permutation(n)
train_idxs, test_idxs = indices[test_size:], indices[:test_size]

df_test = df.iloc[test_idxs][["text", "sentiment"]].copy()
df_test.head()

## LLM Classification

In [None]:
# Helper function to call Claude API for classification
def call_claude_classification(text, system_message=None, model="claude-sonnet-4-20250514", temperature=0.0, max_tokens=100):
    message_params = {
        "model": model,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "messages": [
            {"role": "user", "content": text}
        ]
    }

    # Add system message if provided
    if system_message:
        message_params["system"] = system_message

    response = client.messages.create(**message_params)
    return response.content[0].text

In [None]:
# Define system prompt
system_prompt = """You are a research assistant working for the Fed. You have a degree in Economics."""

def classify_text(texts):
    predictions = []
    total = len(texts)
    
    for i, text in enumerate(texts):
        if i % 10 == 0 or i == total - 1:
            print(f"Processing {i+1}/{total}...")
        
        user_prompt = f"""Your task is to classify the text into one of the three categories ("dovish", "neutral", "hawkish").
        The text is taken at random from the texts of FOMC announcements.

        IMPORTANT: Respond ONLY with valid JSON in this exact format, with no additional text or explanation:
        {{"category": "your_classification"}}

Text: {text}"""
        
        response = call_claude_classification(
            user_prompt,
            system_message=system_prompt,
            temperature=0.0,
            max_tokens=100
        )
        
        try:
            # Remove markdown code blocks using regex
            content = re.sub(r'^```json\s*|\s*```$', '', response.strip())
            
            result = json.loads(content)
            predictions.append(result['category'])
        except Exception as e:
            print(f"Error parsing response: {response}")
            predictions.append(None)
    
    return predictions

In [None]:
# Apply to all test data
df_test['llm_prediction'] = classify_text(df_test['text'].tolist())

# Show results
print(df_test[['sentiment', 'llm_prediction']].head(10))

## Evaluate Model Performance

In [None]:
# Calculate accuracy
accuracy = (df_test['sentiment'] == df_test['llm_prediction']).mean()
print(f"\nLLM Accuracy: {accuracy:.3f}")

In [None]:
# Show confusion matrix
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(df_test['sentiment'], df_test['llm_prediction'], 
                      labels=['dovish', 'hawkish', 'neutral'])
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['dovish', 'hawkish', 'neutral'],
            yticklabels=['dovish', 'hawkish', 'neutral'])
plt.ylabel('True Label')
plt.xlabel('LLM Prediction')
plt.title('LLM Confusion Matrix')
plt.show()