In [12]:
using LinearAlgebra
using Distributions
using Optim
using Random
using StatsFuns
using DataFrames
using StatsBase
# 设置随机种子
Random.seed!(123)

TaskLocalRNG()

## Parameters

In [13]:
# 模型设置
N = 3           # 选项数（不含baseline）
d_u = 1         # u 的维度
d_p = N         # p 的维度
S = 5000         # 样本数量

5000

## Generate true coef and training data

- True coef

In [14]:
a_star = [rand(Uniform(-1, 1), d_u) for n in 1:N]
b_star = [rand(Uniform(-1, 1), d_p) for n in 1:N]
a_star = [round.(a; digits=2) for a in a_star]
b_star = [round.(b; digits=2) for b in b_star]

3-element Vector{Vector{Float64}}:
 [-0.62, 0.05, -0.22]
 [-0.91, 0.87, 0.16]
 [-0.35, 0.05, 0.67]

- Training data

In [15]:
# Step 2: 生成样本 (u, p)，标准正态分布
U_train = [randn(d_u) for s in 1:S];
P_train = [randn(d_p) for s in 1:S];

In [16]:
# Step 3: 计算每个样本的选择概率
probs = Matrix{Float64}(undef, S, N)  # 每行一个样本，每列一个选项
for s in 1:S
    u_this = U_train[s]
    p_this = P_train[s]
    logits = [dot(a_star[n], u_this) + dot(b_star[n], p_this) for n in 1:N]
    exp_logits = exp.(logits)
    denom = 1 + sum(exp_logits)
    probs[s, :] = exp_logits ./ denom
end

In [17]:
# Step 4（可选）: 基于概率采样选项（含 baseline，编号为 0）
choices = Vector{Int}(undef, S)
for s in 1:S
    p = probs[s, :]
    baseline_prob = 1 - sum(p)
    full_p = vcat(baseline_prob, p)  # 添加 baseline 的概率
    choices[s] = sample(0:N, Weights(full_p))  # 随机选择（含baseline）
end
# 打印部分结果
df = DataFrame(choice = choices)
println(first(df, 10))

[1m10×1 DataFrame[0m
[1m Row [0m│[1m choice [0m
     │[90m Int64  [0m
─────┼────────
   1 │      3
   2 │      1
   3 │      2
   4 │      0
   5 │      2
   6 │      3
   7 │      3
   8 │      1
   9 │      2
  10 │      1


In [18]:
using Optim

# 将参数向量 vec_theta 展平为 a 和 b 的矩阵
function unpack_params(vec_theta, N, d_u, d_p)
    offset_a = N * d_u
    a = reshape(vec_theta[1:offset_a], (d_u, N))
    b = reshape(vec_theta[offset_a+1:end], (d_p, N))
    return a, b
end

# 构建负对数似然函数
function neg_log_likelihood(vec_theta::Vector{Float64})
    a, b = unpack_params(vec_theta, N, d_u, d_p)

    total_log_likelihood = 0.0

    for s in 1:S
        u = U_train[s]
        p = P_train[s]
        y = choices[s]  # true label in {0,1,...,N}

        logits = [dot(a[:,n], u) + dot(b[:,n], p) for n in 1:N]
        exp_logits = exp.(logits)
        denom = 1 + sum(exp_logits)

        if y == 0
            prob = 1 / denom
        else
            prob = exp_logits[y] / denom
        end

        total_log_likelihood += log(prob + 1e-12)  # 加epsilon避免log(0)
    end

    return -total_log_likelihood
end


neg_log_likelihood (generic function with 2 methods)

In [19]:
# 初始化参数向量（a 和 b），共 N*(d_u + d_p) 个变量
theta_init = randn(N * (d_u + d_p))

# 用 BFGS 进行优化
result = optimize(neg_log_likelihood, theta_init, BFGS())

# 输出结果
println("Optimization completed:")
println(result)


Optimization completed:
 * Status: success

 * Candidate solution
    Final objective value:     5.981057e+03

 * Found with
    Algorithm:     BFGS

 * Convergence measures
    |x - x'|               = 0.00e+00 ≤ 0.0e+00
    |x - x'|/|x'|          = 0.00e+00 ≤ 0.0e+00
    |f(x) - f(x')|         = 0.00e+00 ≤ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 0.00e+00 ≤ 0.0e+00
    |g(x)|                 = 3.23e-06 ≰ 1.0e-08

 * Work counters
    Seconds run:   85  (vs limit Inf)
    Iterations:    231
    f(x) calls:    686
    ∇f(x) calls:   686



In [20]:
theta_hat = Optim.minimizer(result)
a_hat, b_hat = unpack_params(theta_hat, N, d_u, d_p)

([0.09242659659959769 0.21125114577045984 0.8087371811520461], [-0.6874727044549884 -0.9848223823890944 -0.39939743757782975; 0.12911783755173314 0.9307403240322467 0.10088940923316135; -0.12767346846610003 0.20716011259572353 0.7101380592437164])

In [21]:
[round.(a; digits=2) for a in a_star]

3-element Vector{Vector{Float64}}:
 [0.04]
 [0.17]
 [0.78]

In [25]:
[round.(a; digits=2) for a in a_hat]

1×3 Matrix{Float64}:
 0.09  0.21  0.81

In [26]:
[round.(b; digits=2) for b in b_star]

3-element Vector{Vector{Float64}}:
 [-0.62, 0.05, -0.22]
 [-0.91, 0.87, 0.16]
 [-0.35, 0.05, 0.67]

In [27]:
[round.(b; digits=2) for b in b_hat]

3×3 Matrix{Float64}:
 -0.69  -0.98  -0.4
  0.13   0.93   0.1
 -0.13   0.21   0.71

In [None]:
model = Model(Mosek.Optimizer)
# 定义变量
@variable(model, rho_0 >= 0)                          # ρ₀ ≥ 0
@variable(model, rho[1:N] >= 0)                      # ρ_n ≥ 0
@variable(model, v_sigma[1:N_u])                           # ς = ρ₀ * u
@variable(model, v_phi[1:N])                           # φ_n
@variable(model, Y[1:N, 1:K])                      # y_{nk}
@variable(model, Z[1:N, 1:K])                      # z_{nk}
@variable(model, X[1:N, 1:K], Bin)                 # x_{nk} ∈ {0,1}

# 1. 总和约束
@constraint(model, rho_0 + sum(rho) == 1)

# 2. ς = ρ₀ * u
@constraint(model, v_sigma .== rho_0 * u)

# 3. φ_n = ∑_k z_{nk} * p_{nk}^†
@constraint(model, v_phi .== sum(Z .* p_dag, dims=2))

# 4. 辅助变量上下界
for n in 1:N
    for k in 1:K
        # y_{nk} bounds
        @constraint(model, 0 <= Y[n, k])
        @constraint(model, Y[n, k] <= X[n, k])
        @constraint(model, Y[n, k] >= rho[n] - (1 - X[n, k]))
        @constraint(model, Y[n, k] <= rho[n])

        # z_{nk} bounds
        @constraint(model, 0 <= Z[n, k])
        @constraint(model, Z[n, k] <= X[n, k])
        @constraint(model, Z[n, k] >= rho_0 - (1 - X[n, k]))
        @constraint(model, Z[n, k] <= rho_0)
    end
end


# @constraint(model, 1 .== sum(X, dims=2))

# X_given = zeros(N, K)  # 给定的 X 矩阵
# X_given[1, :] = [1, 0, 0]
# X_given[2, :] = [0, 1, 0]   
# X_given[3, :] = [0, 0, 1]
# @constraint(model, X .== X_given)

for n in 1:N
    @constraint(model, [rho[n], rho_0, A[n,:]' * v_sigma + B[n,:]' * v_phi] in MOI.ExponentialCone())
end

@objective(model, Min,sum(Y .* p_dag) - tau * sum(rho))

optimize!(model)

In [None]:
# 输出状态与目标值
println("求解状态：", termination_status(model))
println("最优目标值：", objective_value(model))

# 输出部分变量（可选）
println("\n估计的变量：")
println("rho_0 = ", value(rho_0))
println("rho = ", value.(rho))
println("X = \n", round.(Int, value.(X)))

In [11]:
using Optim, LinearAlgebra, Random, DataFrames, CSV

# 生成模拟数据（用于演示）
function simulate_mnl_data(n_obs=1000, n_choices=4, n_features=2)
    Random.seed!(123)
    X = randn(n_obs, n_choices, n_features)  # 每个个体每个选项的特征
    β_true = [1.5, -1.0]  # 真实参数

    # 计算选择概率
    utility = zeros(n_obs, n_choices)
    for j in 1:n_choices
        utility[:, j] = X[:, j, :] * β_true
    end
    logits = exp.(utility)
    probs = logits ./ sum(logits, dims=2)

    # 抽样选择
    y = [sample(1:n_choices, Weights(probs[i, :])) for i in 1:n_obs]

    return X, y, β_true
end

# 负对数似然函数
function neg_log_likelihood(β, X, y)
    n_obs, n_choices, n_features = size(X)
    ll = 0.0

    for i in 1:n_obs
        utility = zeros(n_choices)
        for j in 1:n_choices
            utility[j] = dot(X[i, j, :], β)
        end
        max_u = maximum(utility)
        log_sum_exp = max_u + log(sum(exp.(utility .- max_u)))  # 数值稳定
        ll += utility[y[i]] - log_sum_exp
    end

    return -ll  # 返回负值用于最小化
end

# 参数估计主函数
function estimate_mnl(X, y; β_init=nothing)
    n_features = size(X, 3)
    if isnothing(β_init)
        β_init = zeros(n_features)
    end

    # 使用 Optim 进行优化
    result = optimize(
        β -> neg_log_likelihood(β, X, y),
        β_init,
        BFGS(),
        Optim.Options(g_tol=1e-6, iterations=1000, show_trace=false)
    )

    if !Optim.converged(result)
        @warn "Optimization did not converge"
    end

    β_hat = Optim.minimizer(result)
    return β_hat, result
end

# --- 主程序 ---
X, y, β_true = simulate_mnl_data(1000, 4, 2)
β_hat, result = estimate_mnl(X, y)

println("True β: ", β_true)
println("Estimated β: ", β_hat)
println("Converged: ", Optim.converged(result))

True β: [1.5, -1.0]
Estimated β: [1.6857721562197523, -1.0560371362921575]
Converged: true
