# LLM Algorithmic Pricing: Results Analysis

This notebook analyzes LLM agent pricing behavior through textual and statistical methods.

## Contents
1. **Data Loading** - Load simulation results
2. **Textual Analysis** - Word embeddings and text clustering of LLM agents' reasoning
3. **Statistical Analysis** - Descriptive statistics and econometric regression of pricing data
4. **Visualization** - Charts and graphs

## Setup and Imports

In [16]:
import sys
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Statistical analysis
import statsmodels.api as sm
from scipy import stats

# Text analysis
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from openai import OpenAI

# Add parent directory to path
sys.path.insert(0, os.path.abspath('..'))
from config.env_config import OPENAI_API_KEY, DEEPSEEK_API_KEY
from config.market_config import (
    NASH_EQUILIBRIUM_PRICE, NASH_EQUILIBRIUM_PROFIT,
    MONOPOLY_PRICE, MONOPOLY_PROFIT, ANALYSIS_WINDOW, BURN_IN_PERIODS
)

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

print("✓ All packages imported successfully")

✓ All packages imported successfully


## 1. Data Loading

In [None]:
def load_experiment_data(prompt_type, run_id, data_dir="../data"):
    """
    Load all data for a specific experimental run.

    Returns:
        tuple: (simulation_log, metadata, agent0_history, agent1_history, agent0_reasoning, agent1_reasoning)
    """
    run_dir = Path(data_dir) / f"{prompt_type}_run_{run_id}"

    # Load simulation log
    with open(run_dir / "simulation_log.json", 'r') as f:
        simulation_log = json.load(f)

    # Load metadata
    with open(run_dir / "metadata.json", 'r') as f:
        metadata = json.load(f)

    # Load agent histories
    with open(run_dir / "agent_0" / "market_history.json", 'r') as f:
        agent0_history = json.load(f)

    with open(run_dir / "agent_1" / "market_history.json", 'r') as f:
        agent1_history = json.load(f)

    # Load reasoning processes
    with open(run_dir / "agent_0" / "reasoning_process.json", 'r') as f:
        agent0_reasoning = json.load(f)

    with open(run_dir / "agent_1" / "reasoning_process.json", 'r') as f:
        agent1_reasoning = json.load(f)

    return simulation_log, metadata, agent0_history, agent1_history, agent0_reasoning, agent1_reasoning


def load_all_runs(prompt_type, num_runs=10, data_dir="../data"):
    """
    Load data from all runs of a specific prompt type.

    Returns:
        DataFrame with columns: prompt_type, run, period, firm, price, sales, profit, market_share, reasoning
    """
    all_data = []

    for run_id in range(1, num_runs + 1):
        try:
            sim_log, metadata, _, _, _, _ = load_experiment_data(prompt_type, run_id, data_dir)

            for period_data in sim_log:
                period_num = period_data['period']

                # Extract reasoning if available
                reasoning_0 = period_data.get('reasoning_0', '')
                reasoning_1 = period_data.get('reasoning_1', '')

                # Add firm 0 data
                all_data.append({
                    'prompt_type': prompt_type,
                    'run': run_id,
                    'period': period_num,
                    'firm': 0,
                    'price': period_data['firm_0']['price'],
                    'sales': period_data['firm_0']['demand'],
                    'profit': period_data['firm_0']['profit'],
                    'market_share': period_data['firm_0']['market_share'],
                    'reasoning': reasoning_0
                })

                # Add firm 1 data
                all_data.append({
                    'prompt_type': prompt_type,
                    'run': run_id,
                    'period': period_num,
                    'firm': 1,
                    'price': period_data['firm_1']['price'],
                    'sales': period_data['firm_1']['demand'],
                    'profit': period_data['firm_1']['profit'],
                    'market_share': period_data['firm_1']['market_share'],
                    'reasoning': reasoning_1
                })

        except FileNotFoundError:
            print(f"Warning: {prompt_type} Run {run_id} not found")

    return pd.DataFrame(all_data)


# Load data for P1
print("Loading experiment data...")
df_p1 = load_all_runs('P1', num_runs=10)
print(f"✓ Loaded P1 data: {len(df_p1)} observations ({len(df_p1)//2} periods × 2 firms)")
print(f"  Columns: {', '.join(df_p1.columns.tolist())}")

# Uncomment when P2 data is available
# df_p2 = load_all_runs('P2', num_runs=10)
# print(f"✓ Loaded P2 data: {len(df_p2)} observations")

df_p1.head(10)

Loading experiment data...
✓ Loaded P1 data: 2000 observations (1000 periods × 2 firms)
  Columns: prompt_type, run, period, firm, price, sales, profit, market_share, reasoning


Unnamed: 0,prompt_type,run,period,firm,price,sales,profit,market_share,reasoning
0,P1,1,1,0,1.4,44.98,17.99,50.0,"First, the user is asking me to set a price fo..."
1,P1,1,1,1,1.4,44.98,17.99,50.0,"First, the market environment: I'm one of two ..."
2,P1,1,2,0,1.8,23.12,18.5,26.89,"First, I am setting the price for the first ti..."
3,P1,1,2,1,1.4,62.85,25.14,73.11,"First, I am considering the market environment..."
4,P1,1,3,0,1.8,23.12,18.5,26.89,"First, I am considering the market environment..."
5,P1,1,3,1,1.4,62.85,25.14,73.11,"First, I am considering the market environment..."
6,P1,1,4,0,1.8,38.37,30.69,50.0,"First, I am considering the market environment..."
7,P1,1,4,1,1.8,38.37,30.69,50.0,"First, I am considering the market environment..."
8,P1,1,5,0,1.8,38.37,30.69,50.0,"First, I am considering the market environment..."
9,P1,1,5,1,1.8,38.37,30.69,50.0,"First, I am considering the market environment..."


## 2. Textual Analysis

### 2.1 Extract Reasoning Sentences

In [None]:
def extract_all_reasoning_sentences(prompt_type, num_runs=10, data_dir="../data"):
    """
    Extract all reasoning sentences from all runs.
    """
    all_sentences = []

    for run_id in range(1, num_runs + 1):
        try:
            _, _, _, _, reasoning_0, reasoning_1 = load_experiment_data(prompt_type, run_id, data_dir)

            # Get reasoning text
            text_0 = reasoning_0.get('reasoning', '')
            text_1 = reasoning_1.get('reasoning', '')

            # Split into sentences (simple splitting by period)
            sentences_0 = [s.strip() + '.' for s in text_0.split('.') if len(s.strip()) > 20]
            sentences_1 = [s.strip() + '.' for s in text_1.split('.') if len(s.strip()) > 20]

            for sent in sentences_0:
                all_sentences.append({
                    'run': run_id,
                    'firm': 0,
                    'sentence': sent
                })

            for sent in sentences_1:
                all_sentences.append({
                    'run': run_id,
                    'firm': 1,
                    'sentence': sent
                })

        except FileNotFoundError:
            print(f"Warning: {prompt_type} Run {run_id} not found")

    return pd.DataFrame(all_sentences)


# Extract sentences
print("\n=== Extracting Reasoning Sentences ===")
sentences_df = extract_all_reasoning_sentences('P1', num_runs=10)
print(f"✓ Extracted {len(sentences_df)} sentences from {sentences_df['run'].nunique()} runs")
print(f"  Firm 0: {len(sentences_df[sentences_df['firm']==0])} sentences")
print(f"  Firm 1: {len(sentences_df[sentences_df['firm']==1])} sentences")

# Show sample sentences
print("\nSample reasoning sentences:")
for i, row in sentences_df.head(3).iterrows():
    print(f"  {i+1}. [Firm {row['firm']}] {row['sentence'][:100]}...")

sentences_df.head()

### 2.2 Word Embeddings (OpenAI text-embedding-3-large)

In [None]:
def vectorize_sentences(sentences, batch_size=100):
    """
    Convert sentences to high-dimensional vectors using OpenAI embeddings.
    """
    client = OpenAI(api_key=OPENAI_API_KEY)
    
    embeddings = []
    num_batches = (len(sentences) - 1) // batch_size + 1
    
    for i in range(0, len(sentences), batch_size):
        batch = sentences[i:i+batch_size]
        batch_num = i // batch_size + 1
        print(f"  Vectorizing batch {batch_num}/{num_batches} ({len(batch)} sentences)...")
        
        response = client.embeddings.create(
            model="text-embedding-3-large",
            input=batch
        )
        
        batch_embeddings = [item.embedding for item in response.data]
        embeddings.extend(batch_embeddings)
    
    return np.array(embeddings)


# Vectorize all sentences
print("\n=== Creating Word Embeddings ===")
print("Using OpenAI text-embedding-3-large model...")
embeddings = vectorize_sentences(sentences_df['sentence'].tolist())
print(f"\n✓ Created embeddings matrix: {embeddings.shape}")
print(f"  {embeddings.shape[0]} sentences × {embeddings.shape[1]} dimensions")

### 2.3 Dimensionality Reduction (PCA)

Reduce from 3072 dimensions to 20 dimensions while preserving variance.

In [None]:
print("\n=== Applying PCA for Dimensionality Reduction ===")
n_components = 20
pca = PCA(n_components=n_components, random_state=42)
embeddings_reduced = pca.fit_transform(embeddings)

print(f"✓ Reduced from {embeddings.shape[1]} to {embeddings_reduced.shape[1]} dimensions")
print(f"  Explained variance: {pca.explained_variance_ratio_.sum():.2%}")

# Show variance explained by each component
print("\n  Top 5 principal components:")
for i in range(5):
    print(f"    PC{i+1}: {pca.explained_variance_ratio_[i]:.2%}")

### 2.4 Text Clustering (K-means)

Group similar reasoning patterns into 20 clusters.

In [None]:
print("\n=== Running K-means Clustering ===")
n_clusters = 20
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10, max_iter=300)
clusters = kmeans.fit_predict(embeddings_reduced)

sentences_df['cluster'] = clusters
print(f"✓ Assigned sentences to {n_clusters} clusters")

# Cluster size distribution
cluster_counts = sentences_df['cluster'].value_counts().sort_index()
print(f"\n  Cluster sizes (min={cluster_counts.min()}, max={cluster_counts.max()}, mean={cluster_counts.mean():.1f})")

print("\n  Top 5 largest clusters:")
for cluster_id, count in cluster_counts.head(5).items():
    pct = count / len(sentences_df) * 100
    print(f"    Cluster {cluster_id}: {count} sentences ({pct:.1f}%)")

### 2.5 Cluster Visualization

In [None]:
# Bar chart of cluster distribution
plt.figure(figsize=(14, 6))
cluster_counts = sentences_df['cluster'].value_counts().sort_index()
colors = plt.cm.viridis(np.linspace(0, 1, n_clusters))
plt.bar(cluster_counts.index, cluster_counts.values, color=colors, edgecolor='black', alpha=0.8)
plt.xlabel('Cluster ID', fontsize=12)
plt.ylabel('Number of Sentences', fontsize=12)
plt.title('P1: Distribution of Reasoning Sentences Across Clusters', fontsize=14, fontweight='bold')
plt.xticks(range(n_clusters))
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

print(f"\n✓ Total clusters: {n_clusters}")
print(f"  Total sentences: {len(sentences_df)}")
print(f"  Average sentences per cluster: {len(sentences_df)/n_clusters:.1f}")

### 2.6 Generate Cluster Summaries

Use DeepSeek to summarize the common theme in each cluster.

In [None]:
def get_representative_sentences(sentences_df, embeddings_reduced, cluster_id, n=10):
    """
    Get N sentences closest to cluster center.
    """
    cluster_mask = sentences_df['cluster'] == cluster_id
    cluster_embeddings = embeddings_reduced[cluster_mask]
    cluster_sentences = sentences_df[cluster_mask]['sentence'].values
    
    # Calculate cluster center
    center = cluster_embeddings.mean(axis=0)
    
    # Find closest sentences to center
    distances = np.linalg.norm(cluster_embeddings - center, axis=1)
    closest_indices = np.argsort(distances)[:n]
    
    return cluster_sentences[closest_indices]


def summarize_cluster(representative_sentences):
    """
    Use DeepSeek to summarize the common reasoning theme.
    """
    client = OpenAI(
        api_key=DEEPSEEK_API_KEY,
        base_url="https://api.deepseek.com"
    )
    
    prompt = f"""The following are reasoning sentences from LLM pricing agents in a duopoly market:

{chr(10).join([f'{i+1}. {s}' for i, s in enumerate(representative_sentences)])}

Summarize the common pricing strategy or reasoning theme in ONE concise sentence (maximum 20 words)."""
    
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3,
        max_tokens=100
    )
    
    return response.choices[0].message.content.strip()


# Generate summaries for all clusters
print("\n=== Generating Cluster Summaries ===")
print("Using DeepSeek-3.2 to analyze reasoning patterns...\n")

cluster_summaries = {}

for cluster_id in range(n_clusters):
    cluster_size = (sentences_df['cluster'] == cluster_id).sum()
    print(f"Cluster {cluster_id} ({cluster_size} sentences):")
    
    # Get representative sentences
    rep_sentences = get_representative_sentences(sentences_df, embeddings_reduced, cluster_id, n=10)
    
    # Generate summary
    summary = summarize_cluster(rep_sentences)
    cluster_summaries[cluster_id] = summary
    
    print(f"  → {summary}\n")

print("✓ Generated summaries for all clusters")

### 2.7 Top Reasoning Patterns

In [None]:
print("\n" + "="*70)
print("TOP 5 MOST COMMON REASONING PATTERNS (P1 - Defensive Prompt)")
print("="*70 + "\n")

top_clusters = sentences_df['cluster'].value_counts().head(5)

for rank, (cluster_id, count) in enumerate(top_clusters.items(), 1):
    pct = count / len(sentences_df) * 100
    print(f"{rank}. Cluster {cluster_id}")
    print(f"   Frequency: {count} sentences ({pct:.1f}%)")
    print(f"   Strategy: {cluster_summaries[cluster_id]}")
    print()

## 3. Statistical Analysis

### 3.1 Descriptive Statistics (Last 30 Periods)

In [None]:
def calculate_descriptive_stats(df, last_n_periods=ANALYSIS_WINDOW):
    """
    Calculate average prices and profits for the last N periods of each run.
    """
    # Get max period for each run
    max_periods = df.groupby('run')['period'].max()

    results = []
    for run_id in df['run'].unique():
        max_period = max_periods[run_id]

        # Filter last N periods for this run
        last_periods = df[
            (df['run'] == run_id) &
            (df['period'] > max_period - last_n_periods)
        ]

        # Calculate averages for each firm
        for firm_id in [0, 1]:
            firm_data = last_periods[last_periods['firm'] == firm_id]
            results.append({
                'run': run_id,
                'firm': firm_id,
                'avg_price': firm_data['price'].mean(),
                'avg_profit': firm_data['profit'].mean()
            })

    return pd.DataFrame(results)


print("\n=== Descriptive Statistics ===")
stats_p1 = calculate_descriptive_stats(df_p1)

# Separate by firm for display
stats_firm0 = stats_p1[stats_p1['firm'] == 0]
stats_firm1 = stats_p1[stats_p1['firm'] == 1]

print(f"\nP1 (Defensive Prompt) - Last {ANALYSIS_WINDOW} Periods:")
print(f"\n  Average Price:")
print(f"    Firm 0: ${stats_firm0['avg_price'].mean():.3f} (std: ${stats_firm0['avg_price'].std():.3f})")
print(f"    Firm 1: ${stats_firm1['avg_price'].mean():.3f} (std: ${stats_firm1['avg_price'].std():.3f})")

print(f"\n  Average Profit:")
print(f"    Firm 0: ${stats_firm0['avg_profit'].mean():.3f} (std: ${stats_firm0['avg_profit'].std():.3f})")
print(f"    Firm 1: ${stats_firm1['avg_profit'].mean():.3f} (std: ${stats_firm1['avg_profit'].std():.3f})")

print(f"\n  Benchmarks:")
print(f"    Nash Equilibrium: Price = ${NASH_EQUILIBRIUM_PRICE:.3f}, Profit = ${NASH_EQUILIBRIUM_PROFIT:.3f}")
print(f"    Monopoly Level:   Price = ${MONOPOLY_PRICE:.3f}, Profit = ${MONOPOLY_PROFIT:.3f}")

# Calculate distance from benchmarks (average across both firms)
avg_price = stats_p1['avg_price'].mean()
avg_profit = stats_p1['avg_profit'].mean()

print(f"\n  Distance from Nash:     Price = ${avg_price - NASH_EQUILIBRIUM_PRICE:+.3f}, Profit = ${avg_profit - NASH_EQUILIBRIUM_PROFIT:+.3f}")
print(f"\n  Distance from Monopoly: Price = ${avg_price - MONOPOLY_PRICE:+.3f}, Profit = ${avg_profit - MONOPOLY_PROFIT:+.3f}")

stats_p1

### 3.2 Visualization: Price Evolution Over Time

In [None]:
# Plot price evolution for 3 representative runs
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

for i, run_id in enumerate([1, 2, 3]):
    run_data = df_p1[df_p1['run'] == run_id]

    # Separate data by firm
    firm0_data = run_data[run_data['firm'] == 0]
    firm1_data = run_data[run_data['firm'] == 1]

    axes[i].plot(firm0_data['period'], firm0_data['price'], label='Firm 0', linewidth=2, color='#2E86AB')
    axes[i].plot(firm1_data['period'], firm1_data['price'], label='Firm 1', linewidth=2, color='#A23B72')
    axes[i].axhline(y=NASH_EQUILIBRIUM_PRICE, color='green', linestyle='--', label='Nash Equilibrium', alpha=0.7, linewidth=1.5)
    axes[i].axhline(y=MONOPOLY_PRICE, color='red', linestyle='--', label='Monopoly', alpha=0.7, linewidth=1.5)

    axes[i].set_xlabel('Period', fontsize=11)
    axes[i].set_ylabel('Price ($)', fontsize=11)
    axes[i].set_title(f'P1 Run {run_id}: Price Evolution', fontsize=12, fontweight='bold')
    axes[i].legend(loc='best', fontsize=9)
    axes[i].grid(True, alpha=0.3)
    axes[i].set_ylim([1.5, 2.3])

plt.tight_layout()
plt.show()

### 3.3 Visualization: Price and Profit Distributions

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Separate by firm
stats_firm0 = stats_p1[stats_p1['firm'] == 0]
stats_firm1 = stats_p1[stats_p1['firm'] == 1]

# Price distribution
axes[0].hist(stats_firm0['avg_price'], bins=12, alpha=0.6, label='Firm 0', edgecolor='black', color='#2E86AB')
axes[0].hist(stats_firm1['avg_price'], bins=12, alpha=0.6, label='Firm 1', edgecolor='black', color='#A23B72')
axes[0].axvline(x=NASH_EQUILIBRIUM_PRICE, color='green', linestyle='--', linewidth=2, label='Nash')
axes[0].axvline(x=MONOPOLY_PRICE, color='red', linestyle='--', linewidth=2, label='Monopoly')
axes[0].set_xlabel('Average Price ($)', fontsize=11)
axes[0].set_ylabel('Frequency', fontsize=11)
axes[0].set_title(f'P1: Price Distribution (Last {ANALYSIS_WINDOW} Periods)', fontsize=12, fontweight='bold')
axes[0].legend(fontsize=9)
axes[0].grid(axis='y', alpha=0.3)

# Profit distribution
axes[1].hist(stats_firm0['avg_profit'], bins=12, alpha=0.6, label='Firm 0', edgecolor='black', color='#2E86AB')
axes[1].hist(stats_firm1['avg_profit'], bins=12, alpha=0.6, label='Firm 1', edgecolor='black', color='#A23B72')
axes[1].axvline(x=NASH_EQUILIBRIUM_PROFIT, color='green', linestyle='--', linewidth=2, label='Nash')
axes[1].axvline(x=MONOPOLY_PROFIT, color='red', linestyle='--', linewidth=2, label='Monopoly')
axes[1].set_xlabel('Average Profit ($)', fontsize=11)
axes[1].set_ylabel('Frequency', fontsize=11)
axes[1].set_title(f'P1: Profit Distribution (Last {ANALYSIS_WINDOW} Periods)', fontsize=12, fontweight='bold')
axes[1].legend(fontsize=9)
axes[1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

### 3.4 Econometric Regression Analysis

#### Model Specification:

$$p^t_{i,r} = \alpha_{i,r} + \gamma p^{t-1}_{i,r} + \delta p^{t-1}_{-i,r} + \epsilon^t_{i,r}$$

Where:
- $p^t_{i,r}$ = price set by agent $i$ at period $t$ of run $r$
- $\gamma$ = **own price stickiness** (how much agent follows its own previous price)
- $\delta$ = **competitor responsiveness** (how much agent responds to competitor's price)
- $\alpha_{i,r}$ = firm-run fixed effect

In [None]:
def prepare_regression_data(df, firm_id=0, omit_first_n=BURN_IN_PERIODS):
    """
    Prepare data for regression analysis.
    Omit first N periods to ensure all firms have equal history length.
    """
    # Filter for specific firm and periods after burn-in
    df_filtered = df[(df['firm'] == firm_id) & (df['period'] > omit_first_n)].copy()

    # Sort by run and period to ensure proper ordering
    df_filtered = df_filtered.sort_values(['run', 'period'])

    # Create own price lag
    df_filtered['price_lag'] = df_filtered.groupby('run')['price'].shift(1)

    # Get competitor's price (need to merge with competitor data)
    # First create competitor dataframe
    competitor_id = 1 - firm_id
    df_competitor = df[(df['firm'] == competitor_id) & (df['period'] > omit_first_n)].copy()
    df_competitor = df_competitor.sort_values(['run', 'period'])

    # Merge to get competitor's lagged price
    df_merged = df_filtered.merge(
        df_competitor[['run', 'period', 'price']],
        on=['run', 'period'],
        suffixes=('', '_competitor')
    )

    # Create competitor price lag
    df_merged['price_competitor_lag'] = df_merged.groupby('run')['price_competitor'].shift(1)

    # Drop NaN rows (first period after shift in each run)
    df_merged = df_merged.dropna(subset=['price_lag', 'price_competitor_lag'])

    # Create run identifier for fixed effects
    df_merged['run_str'] = df_merged['run'].astype(str)

    return df_merged


def run_pricing_regression(df, firm_id=0):
    """
    Run OLS regression with firm-run fixed effects.
    """
    df_reg = prepare_regression_data(df, firm_id)

    # Prepare dependent and independent variables
    y = df_reg['price']
    X = pd.get_dummies(
        df_reg[['run_str', 'price_lag', 'price_competitor_lag']],
        columns=['run_str'],
        drop_first=True
    )

    # Add constant
    X = sm.add_constant(X)

    # Run OLS regression
    model = sm.OLS(y, X)
    results = model.fit()

    return results


# Run regressions for both firms
print("\n=== Econometric Regression Analysis ===")
print("\nFirm 0 Regression Results:")
print("-" * 80)
results_0 = run_pricing_regression(df_p1, firm_id=0)
print(results_0.summary())

print("\n\nFirm 1 Regression Results:")
print("-" * 80)
results_1 = run_pricing_regression(df_p1, firm_id=1)
print(results_1.summary())

### 3.5 Regression Coefficients Summary

In [None]:
print("\n" + "="*70)
print("KEY REGRESSION COEFFICIENTS")
print("="*70)

# Firm 0
gamma_0 = results_0.params['price_lag']
delta_0 = results_0.params['price_competitor_lag']
gamma_0_se = results_0.bse['price_lag']
delta_0_se = results_0.bse['price_competitor_lag']

print("\nFirm 0:")
print(f"  γ (own price stickiness):      {gamma_0:.4f} (SE: {gamma_0_se:.4f})")
print(f"  δ (competitor responsiveness): {delta_0:.4f} (SE: {delta_0_se:.4f})")
print(f"  R²: {results_0.rsquared:.4f}")

# Firm 1
gamma_1 = results_1.params['price_lag']
delta_1 = results_1.params['price_competitor_lag']
gamma_1_se = results_1.bse['price_lag']
delta_1_se = results_1.bse['price_competitor_lag']

print("\nFirm 1:")
print(f"  γ (own price stickiness):      {gamma_1:.4f} (SE: {gamma_1_se:.4f})")
print(f"  δ (competitor responsiveness): {delta_1:.4f} (SE: {delta_1_se:.4f})")
print(f"  R²: {results_1.rsquared:.4f}")

# Interpretation
print("\nInterpretation:")
avg_gamma = (gamma_0 + gamma_1) / 2
avg_delta = (delta_0 + delta_1) / 2
print(f"  Average own price stickiness (γ):      {avg_gamma:.4f}")
print(f"  Average competitor responsiveness (δ): {avg_delta:.4f}")

if avg_gamma > 0.5:
    print("  → Firms show HIGH stickiness to their own previous prices")
else:
    print("  → Firms show LOW stickiness to their own previous prices")

if avg_delta > 0.3:
    print("  → Firms show HIGH responsiveness to competitor prices (potential coordination)")
else:
    print("  → Firms show LOW responsiveness to competitor prices")

---

## 4. Export Results

In [None]:
print("\n=== Exporting Analysis Results ===")

# 1. Descriptive statistics
stats_p1.to_csv('descriptive_stats_P1.csv', index=False)
print("✓ Saved descriptive_stats_P1.csv")

# 2. Regression results
with open('regression_results_P1.txt', 'w') as f:
    f.write("="*80 + "\n")
    f.write("ECONOMETRIC REGRESSION ANALYSIS - P1 (Defensive Prompt)\n")
    f.write("="*80 + "\n\n")

    f.write("Firm 0 Regression:\n")
    f.write("-"*80 + "\n")
    f.write(results_0.summary().as_text())

    f.write("\n\n" + "="*80 + "\n\n")

    f.write("Firm 1 Regression:\n")
    f.write("-"*80 + "\n")
    f.write(results_1.summary().as_text())
print("✓ Saved regression_results_P1.txt")

# 3. Cluster summaries
cluster_summary_df = pd.DataFrame([
    {
        'cluster_id': k,
        'summary': v,
        'size': (sentences_df['cluster'] == k).sum(),
        'percentage': (sentences_df['cluster'] == k).sum() / len(sentences_df) * 100
    }
    for k, v in cluster_summaries.items()
]).sort_values('size', ascending=False)

cluster_summary_df.to_csv('cluster_summaries_P1.csv', index=False)
print("✓ Saved cluster_summaries_P1.csv")

# 4. Complete analysis summary
with open('analysis_summary_P1.txt', 'w') as f:
    f.write("="*80 + "\n")
    f.write("LLM ALGORITHMIC PRICING ANALYSIS SUMMARY - P1 (Defensive Prompt)\n")
    f.write("="*80 + "\n\n")

    f.write("TEXTUAL ANALYSIS\n")
    f.write("-"*80 + "\n")
    f.write(f"Total sentences analyzed: {len(sentences_df)}\n")
    f.write(f"Number of clusters: {n_clusters}\n")
    f.write(f"\nTop 5 reasoning patterns:\n")
    for rank, row in cluster_summary_df.head(5).iterrows():
        f.write(f"  {rank+1}. Cluster {row['cluster_id']} ({row['size']} sentences, {row['percentage']:.1f}%)\n")
        f.write(f"     {row['summary']}\n")

    f.write(f"\n\nSTATISTICAL ANALYSIS\n")
    f.write("-"*80 + "\n")
    f.write(f"Average price: ${avg_price:.3f}\n")
    f.write(f"Average profit: ${avg_profit:.3f}\n")
    f.write(f"Nash equilibrium price: ${NASH_EQUILIBRIUM_PRICE:.3f}, Monopoly price: ${MONOPOLY_PRICE:.3f}\n")
    f.write(f"\nRegression coefficients:\n")
    f.write(f"  Average γ (own stickiness): {avg_gamma:.4f}\n")
    f.write(f"  Average δ (competitor responsiveness): {avg_delta:.4f}\n")

print("✓ Saved analysis_summary_P1.txt")

print("\n" + "="*70)
print("ANALYSIS COMPLETE")
print("="*70)
print("\nAll results exported to current directory.")
print("\nGenerated files:")
print("  • descriptive_stats_P1.csv")
print("  • regression_results_P1.txt")
print("  • cluster_summaries_P1.csv")
print("  • analysis_summary_P1.txt")