# Benchmark Metrics Calculator for DQN Market Simulation

**Purpose:**
- Calculate Nash equilibrium and Monopoly prices for all experimental scenarios
- Support both symmetric and asymmetric firm configurations
- Verify the benchmark values specified in README.md

**Scenarios:**
1. 2-Firm Symmetric (μ = 0.25)
2. 3-Firm Symmetric (μ = 0.25)
3. 4-Firm Symmetric (μ = 0.25)
4. 4-Firm Asymmetric (μ = 0.30)



In [15]:
# =============================================================================
# N-Firm Logit Bertrand Pricing Model Numerical Solver
# Uses Newton's method to compute Nash equilibrium and monopoly prices
# =============================================================================

from __future__ import annotations
import numpy as np
from numpy.linalg import solve, LinAlgError
from dataclasses import dataclass
from typing import Callable, Tuple

def logit_shares(p: np.ndarray, g: np.ndarray, mu: float) -> np.ndarray:
    """
    Compute Logit demand shares
    
    Mathematical formula: s_i = exp((g_i - p_i)/μ) / [exp(0) + Σ_j exp((g_j - p_j)/μ)]
    
    Parameters:
        p: Price vector [p_0, p_1, ..., p_N]
        g: Product quality vector [g_0, g_1, ..., g_N] 
        mu: Product substitutability parameter)
    
    Returns:
        Market share vector [s_0, s_1, ..., s_N]
    """
    # Compute utility: u_i = (g_i - p_i) / μ
    util = (g - p) / mu
    
    # Numerical stability: prevent exp() overflow by shifting with max utility value
    m = max(0.0, float(util.max()))
    e = np.exp(util - m)  # Exponential utility for each product
    e0 = np.exp(-m)       # Outside option utility (u=0) also shifted by same amount
    
    # Compute denominator: outside option + sum of all product utilities
    denom = e0 + e.sum()
    
    # Return market share for each product
    return e / denom

def dsdpi_matrix(s: np.ndarray, mu: float) -> np.ndarray:
    """
    Compute Jacobian matrix of demand with respect to prices (∂s_i/∂p_j)
    
    Mathematical formulas:
        ∂s_i/∂p_i = -(1/μ) * s_i * (1 - s_i)  [diagonal elements]
        ∂s_i/∂p_j = (1/μ) * s_i * s_j         [off-diagonal elements]
    
    Parameters:
        s: Market share vector
        mu: Substitutability parameter
    
    Returns:
        N×N Jacobian matrix where M[i,j] = ∂s_i/∂p_j
    """
    N = len(s)
    M = np.empty((N, N), dtype=float)
    
    for i in range(N):
        for j in range(N):
            if i == j:
                # Diagonal: own price effect on own demand (negative effect)
                M[i, j] = -(1.0/mu) * s[i] * (1.0 - s[i])
            else:
                # Off-diagonal: competitor price effect on own demand (positive effect)
                M[i, j] = (1.0/mu) * s[i] * s[j]
    return M

def nash_F_and_J(p: np.ndarray, g: np.ndarray, c: np.ndarray, mu: float) -> Tuple[np.ndarray, np.ndarray]:
    """
    Compute Nash equilibrium first-order conditions F and Jacobian matrix J
    
    Nash equilibrium condition: each firm's first-order condition F_i = 0
    F_i = s_i + (p_i - c_i) * ∂s_i/∂p_i = 0
    
    Parameters:
        p: Price vector
        g: Quality vector  
        c: Cost vector
        mu: Substitutability parameter
    
    Returns:
        F: First-order condition vector [F_0, F_1, ..., F_N]
        J: Jacobian matrix (∂F_i/∂p_j)
    """
    # Compute market shares at current prices
    s = logit_shares(p, g, mu)
    
    # Compute Jacobian matrix of demand with respect to prices
    dsdpi = dsdpi_matrix(s, mu)
    N = len(p)
    
    # Build first-order conditions: F_i = s_i + (p_i - c_i) * ∂s_i/∂p_i
    F = s + (p - c) * np.diag(dsdpi)
    
    # Build Jacobian matrix: J[i,j] = ∂F_i/∂p_j
    J = np.zeros((N, N))
    for i in range(N):
        for j in range(N):
            # Compute second-order derivative terms
            if i == j:
                # ∂²s_i/∂p_i² = (1/μ²) * s_i * (1-s_i) * (1-2s_i)
                d2 = (1.0/mu**2) * s[i] * (1.0 - s[i]) * (1.0 - 2.0*s[i])
            else:
                # ∂²s_i/∂p_i∂p_j = -(1/μ²) * s_i * s_j * (1-2s_i)
                d2 = -(1.0/mu**2) * s[i] * s[j] * (1.0 - 2.0*s[i])
            
            # Jacobian matrix element: ∂F_i/∂p_j = ∂s_i/∂p_j + (p_i-c_i)*∂²s_i/∂p_i∂p_j + δ_ij*∂s_i/∂p_i
            J[i, j] = dsdpi[i, j] + (p[i] - c[i]) * d2 + (1 if i == j else 0) * dsdpi[i, i]
    
    return F, J

def monopoly_F_and_J(p: np.ndarray, g: np.ndarray, c: np.ndarray, mu: float) -> Tuple[np.ndarray, np.ndarray]:
    """
    Compute monopoly pricing first-order conditions F and Jacobian matrix J
    
    Monopoly condition: joint profit maximization with all prices changing simultaneously
    F_k = s_k + Σ_i (p_i - c_i) * ∂s_i/∂p_k = 0
    
    Parameters:
        p: Price vector
        g: Quality vector
        c: Cost vector  
        mu: Substitutability parameter
    
    Returns:
        F: First-order condition vector [F_0, F_1, ..., F_N]
        J: Jacobian matrix (∂F_k/∂p_j)
    """
    # Compute market shares at current prices
    s = logit_shares(p, g, mu)
    
    # Compute Jacobian matrix of demand with respect to prices
    dsdpi = dsdpi_matrix(s, mu)
    N = len(p)
    
    # Build first-order conditions: F_k = s_k + Σ_i (p_i - c_i) * ∂s_i/∂p_k
    F = s + dsdpi.T @ (p - c)
    
    # Build Jacobian matrix: J[k,j] = ∂F_k/∂p_j
    J = np.zeros((N, N))
    for k in range(N):
        for j in range(N):
            # ∂s_k/∂p_j term
            ds_k_dpj = dsdpi[k, j]
            
            # Σ_i (p_i - c_i) * ∂²s_i/∂p_k∂p_j term
            term = 0.0
            for i in range(N):
                if i == k:
                    # Second-order derivative when i = k
                    if j == i:
                        d2 = (1.0/mu**2) * s[i] * (1.0 - s[i]) * (1.0 - 2.0*s[i])
                    else:
                        d2 = -(1.0/mu**2) * s[i] * s[j] * (1.0 - 2.0*s[i])
                else:
                    # Second-order derivative when i ≠ k
                    d2 = (1.0/mu) * (dsdpi[i, j] * s[k] + s[i] * dsdpi[k, j])
                
                # Accumulate terms
                term += (1 if i == j else 0) * dsdpi[i, k] + (p[i] - c[i]) * d2
            
            # Jacobian matrix element
            J[k, j] = ds_k_dpj + term
    
    return F, J

@dataclass
class NewtonResult:
    """
    Newton's method solution result
    
    Attributes:
        p: Solved price vector
        converged: Whether convergence was achieved
        iters: Number of iterations
    """
    p: np.ndarray
    converged: bool
    iters: int

def newton_vec(FJ, p0: np.ndarray, g: np.ndarray, c: np.ndarray, mu: float,
               tol: float = 1e-12, max_iter: int = 200, damping: float = 1.0) -> NewtonResult:
    """
    Vectorized Newton-Raphson method for solving nonlinear equation systems
    
    Algorithm: p_{k+1} = p_k - damping * J^{-1} * F(p_k)
    
    Parameters:
        FJ: Function returning (F, J) i.e., first-order conditions and Jacobian matrix
        p0: Initial price vector
        g: Quality vector
        c: Cost vector
        mu: Substitutability parameter
        tol: Convergence tolerance
        max_iter: Maximum number of iterations
        damping: Damping factor (prevents divergence)
    
    Returns:
        NewtonResult: Solution result
    """
    p = np.array(p0, dtype=float).copy()
    
    for t in range(1, max_iter + 1):
        # Compute first-order conditions and Jacobian matrix at current point
        F, J = FJ(p, g, c, mu)
        
        try:
            # Solve linear system J * step = F
            step = solve(J, F)
        except LinAlgError:
            # If Jacobian matrix is singular, use pseudo-inverse
            step = np.linalg.pinv(J) @ F
        
        # Update prices: p_{k+1} = p_k - damping * step
        p_next = p - damping * step
        
        # Check convergence: ||p_{k+1} - p_k||_∞ < tol
        if np.linalg.norm(p_next - p, ord=np.inf) < tol:
            return NewtonResult(p=p_next, converged=True, iters=t)
        
        p = p_next
    
    # Reached maximum iterations without convergence
    return NewtonResult(p=p, converged=False, iters=max_iter)

def solve_nash(g: np.ndarray, c: np.ndarray, mu: float, p0: np.ndarray | None = None, **kwargs) -> NewtonResult:
    """
    Solve for Nash equilibrium prices
    
    Parameters:
        g: Quality vector
        c: Cost vector
        mu: Substitutability parameter
        p0: Initial prices (default: c + 0.6)
        **kwargs: Other Newton method parameters
    
    Returns:
        NewtonResult: Nash equilibrium solution result
    """
    if p0 is None:
        # Smart initial value: cost plus reasonable profit margin
        p0 = c + 0.6
    return newton_vec(nash_F_and_J, p0, g, c, mu, **kwargs)

def solve_monopoly(g: np.ndarray, c: np.ndarray, mu: float, p0: np.ndarray | None = None, **kwargs) -> NewtonResult:
    """
    Solve for monopoly prices
    
    Parameters:
        g: Quality vector
        c: Cost vector
        mu: Substitutability parameter
        p0: Initial prices (default: c + 0.9)
        **kwargs: Other Newton method parameters
    
    Returns:
        NewtonResult: Monopoly pricing solution result
    """
    if p0 is None:
        # Smart initial value: cost plus higher profit margin (monopoly prices are typically higher)
        p0 = c + 0.9
    return newton_vec(monopoly_F_and_J, p0, g, c, mu, **kwargs)

def shares_profits(p: np.ndarray, g: np.ndarray, c: np.ndarray, mu: float):
    """
    Compute market shares and profits at given prices
    
    Parameters:
        p: Price vector
        g: Quality vector
        c: Cost vector
        mu: Substitutability parameter
    
    Returns:
        s: Market share vector
        prof: Profit vector (π_i = (p_i - c_i) * s_i)
    """
    s = logit_shares(p, g, mu)
    prof = (p - c) * s
    return s, prof

def summarize(title: str, p: np.ndarray, g: np.ndarray, c: np.ndarray, mu: float):
    """
    Print summary information of pricing results
    
    Parameters:
        title: Title
        p: Price vector
        g: Quality vector
        c: Cost vector
        mu: Substitutability parameter
    """
    s, prof = shares_profits(p, g, c, mu)
    
    print(f"=== {title} ===")
    for i in range(len(p)):
        print(f"firm {i}: price={p[i]:.6f}, share={s[i]:.6f}, profit={prof[i]:.6f}, "
              f"cost={c[i]:.6f}, quality={g[i]:.6f}")
    
    print(f"TOTAL: share={s.sum():.6f}, profit={prof.sum():.6f}")


In [17]:
# =============================================================================
# Scenario 1: Two-Firm Symmetric (Basic DQN Simulation)
# =============================================================================

print("="*80)
print("SCENARIO 1: 2-FIRM SYMMETRIC")
print("Experimental parameters from README.md")
print("="*80)

# Market parameters
mu = 0.25  # Substitutability parameter
g = np.array([2.00, 2.00])  # Product qualities
c = np.array([1.00, 1.00])  # Marginal costs

print(f"\nMarket Configuration:")
print(f"  Number of firms: 2")
print(f"  Marginal costs (c): {c}")
print(f"  Product qualities (g): {g}")
print(f"  Substitutability (μ): {mu}")

# Calculate Nash equilibrium
resN = solve_nash(g, c, mu)
summarize("Nash Equilibrium", resN.p, g, c, mu)
print(f"Convergence: {resN.converged} in {resN.iters} iterations\n")

# Calculate Monopoly prices
resM = solve_monopoly(g, c, mu)
summarize("Monopoly (Collusive)", resM.p, g, c, mu)
print(f"Convergence: {resM.converged} in {resM.iters} iterations\n")

# Verify against README values
print("\n" + "-"*40)
print("VERIFICATION AGAINST README.md VALUES:")
print("-"*40)
print(f"Nash price (calculated):     {resN.p[0]:.3f}")
print(f"Nash price (README):          1.473")
print(f"Monopoly price (calculated):  {resM.p[0]:.3f}")
print(f"Monopoly price (README):      1.925")

_, prof_N = shares_profits(resN.p, g, c, mu)
_, prof_M = shares_profits(resM.p, g, c, mu)
print(f"\nNash profit (calculated):    {prof_N[0]:.3f}")
print(f"Nash profit (README):         0.223")
print(f"Monopoly profit (calculated): {prof_M[0]:.3f}")
print(f"Monopoly profit (README):     0.337")

# Calculate price range for DQN action space
xi = 0.1
price_diff = resM.p[0] - resN.p[0]
min_price = resN.p[0] - xi * price_diff
max_price = resM.p[0] + xi * price_diff
print(f"\nPrice range (ξ = {xi}):")
print(f"  Min price: {min_price:.3f}")
print(f"  Max price: {max_price:.3f}")

=== Nash (Bertrand) ===
firm 0: price=1.472927, share=0.471377, profit=0.222927, cost=1.000000, quality=2.000000
firm 1: price=1.472927, share=0.471377, profit=0.222927, cost=1.000000, quality=2.000000
TOTAL: share=0.942754, profit=0.445853
Nash converged: True in 4 iterations.

=== Monopoly (Joint Profit) ===
firm 0: price=1.924981, share=0.364862, profit=0.337490, cost=1.000000, quality=2.000000
firm 1: price=1.924981, share=0.364862, profit=0.337490, cost=1.000000, quality=2.000000
TOTAL: share=0.729724, profit=0.674981
Monopoly converged: True in 4 iterations.


In [18]:
# =============================================================================
# Example 2: Symmetric Three-Firm Scenario
# =============================================================================

if __name__ == "__main__":
    # Example with 3 symmetric firms
    mu = 0.25
    g = np.array([2.00, 2.00, 2.00])
    c = np.array([1.00, 1.00, 1.00])

    resN = solve_nash(g, c, mu)
    summarize("Nash (Bertrand)", resN.p, g, c, mu)
    print(f"Nash converged: {resN.converged} in {resN.iters} iterations.\n")

    resM = solve_monopoly(g, c, mu)
    summarize("Monopoly (Joint Profit)", resM.p, g, c, mu)
    print(f"Monopoly converged: {resM.converged} in {resM.iters} iterations.")

=== Nash (Bertrand) ===
firm 0: price=1.370163, share=0.324621, profit=0.120163, cost=1.000000, quality=2.000000
firm 1: price=1.370163, share=0.324621, profit=0.120163, cost=1.000000, quality=2.000000
firm 2: price=1.370163, share=0.324621, profit=0.120163, cost=1.000000, quality=2.000000
TOTAL: share=0.973864, profit=0.360488
Nash converged: True in 4 iterations.

=== Monopoly (Joint Profit) ===
firm 0: price=2.000000, share=0.250000, profit=0.250000, cost=1.000000, quality=2.000000
firm 1: price=2.000000, share=0.250000, profit=0.250000, cost=1.000000, quality=2.000000
firm 2: price=2.000000, share=0.250000, profit=0.250000, cost=1.000000, quality=2.000000
TOTAL: share=0.750000, profit=0.750000
Monopoly converged: True in 5 iterations.


In [None]:
# =============================================================================
# Example 3: Symmetric Four-Firm Scenario
# =============================================================================

if __name__ == "__main__":
    # Example with 4 symmetric firms
    mu = 0.25
    g = np.array([2.00, 2.00, 2.00, 2.00])
    c = np.array([1.00, 1.00, 1.00, 1.00])

    resN = solve_nash(g, c, mu)
    summarize("Nash (Bertrand)", resN.p, g, c, mu)
    print(f"Nash converged: {resN.converged} in {resN.iters} iterations.\n")

    resM = solve_monopoly(g, c, mu)
    summarize("Monopoly (Joint Profit)", resM.p, g, c, mu)
    print(f"Monopoly converged: {resM.converged} in {resM.iters} iterations.")

=== Nash (Bertrand) ===
firm 0: price=1.331461, share=0.245763, profit=0.081461, cost=1.000000, quality=2.000000
firm 1: price=1.331461, share=0.245763, profit=0.081461, cost=1.000000, quality=2.000000
firm 2: price=1.331461, share=0.245763, profit=0.081461, cost=1.000000, quality=2.000000
firm 3: price=1.331461, share=0.245763, profit=0.081461, cost=1.000000, quality=2.000000
TOTAL: share=0.983051, profit=0.325843
Nash converged: True in 4 iterations.

=== Monopoly (Joint Profit) ===
firm 0: price=2.054411, share=0.190725, profit=0.201103, cost=1.000000, quality=2.000000
firm 1: price=2.054411, share=0.190725, profit=0.201103, cost=1.000000, quality=2.000000
firm 2: price=2.054411, share=0.190725, profit=0.201103, cost=1.000000, quality=2.000000
firm 3: price=2.054411, share=0.190725, profit=0.201103, cost=1.000000, quality=2.000000
TOTAL: share=0.762901, profit=0.804411
Monopoly converged: True in 5 iterations.


In [26]:
# =============================================================================
# Example 4: Asymmetric Four-Firm Scenario (Extended Analysis)
# =============================================================================

if __name__ == "__main__":
    # Set asymmetric four-firm parameters (different qualities and costs)
    mu = 0.3
    g = np.array([2.10, 2.00, 1.90, 1.8])  # Different product qualities
    c = np.array([1.05, 1.10, 0.95, 1.00])  # Different marginal costs

    print("=== Asymmetric Four-Firm Logit Bertrand Pricing ===\n")
    
    # Solve Nash equilibrium
    resN = solve_nash(g, c, mu)
    summarize("Nash (Bertrand)", resN.p, g, c, mu)
    print(f"Nash converged: {resN.converged} in {resN.iters} iterations.\n")

    # Solve monopoly pricing
    resM = solve_monopoly(g, c, mu)
    summarize("Monopoly (Joint Profit)", resM.p, g, c, mu)
    print(f"Monopoly converged: {resM.converged} in {resM.iters} iterations.")

=== Asymmetric Four-Firm Logit Bertrand Pricing ===

=== Nash (Bertrand) ===
firm 0: price=1.486008, share=0.311939, profit=0.136008, cost=1.050000, quality=2.100000
firm 1: price=1.486262, share=0.223325, profit=0.086262, cost=1.100000, quality=2.000000
firm 2: price=1.350745, share=0.251394, profit=0.100745, cost=0.950000, quality=1.900000
firm 3: price=1.362778, share=0.173049, profit=0.062778, cost=1.000000, quality=1.800000
TOTAL: share=0.959707, profit=0.385793
Nash converged: True in 6 iterations.

=== Monopoly (Joint Profit) ===
firm 0: price=2.121103, share=0.261060, profit=0.279622, cost=1.050000, quality=2.100000
firm 1: price=2.171103, share=0.158341, profit=0.169599, cost=1.100000, quality=2.000000
firm 2: price=2.021103, share=0.187058, profit=0.200358, cost=0.950000, quality=1.900000
firm 3: price=2.071103, share=0.113456, profit=0.121523, cost=1.000000, quality=1.800000
TOTAL: share=0.719915, profit=0.771103
Monopoly converged: True in 5 iterations.
