In [None]:
import numpy as np
from numpy import random
import time
import pickle
import os
import pathlib
import inspect

from Data import data_generation
from OLS import ols_method
from DDR import ddr_method
from SPO_Plus import SPO_plus_method
from Performance import performance_evaluation
from Figure import regret_h2h
regret_h2h_fig = regret_h2h()

In [None]:
def obtain_cost(iters,DataPath,file_name):
    cost = []
    for i in range(iters):
        file_path = DataPath + "iter="+str(i) +"/"
        with open(file_path+file_name, "rb") as f:
            file_pkl = pickle.load(f)
        cost.append(file_pkl['cost'])
    return cost

# Store results

In [None]:
def store_results(file_name, **kwargs):
    """
    存储变量名和值到字典，并保存为 pickle 文件。

    参数:
    - file_name (str): 要存储的 pickle 文件名
    - **kwargs: 需要存储的变量（自动识别变量名）

    返回:
    - result: 包含变量名和值的字典
    """
    result = {}
    frame = inspect.currentframe().f_back  # 获取上一层调用的栈帧
    # 遍历 kwargs 变量
    for key, value in kwargs.items():
        result[key] = value
    # 存储为 pickle 文件
    with open(file_name, "wb") as f:
        pickle.dump(result, f)

# Comparison

In [25]:
def calculate_comparison(file_name,perf_eva,c_item, c_base, c_oracle,ypio):
    if ypio == 0:
#     # compares results
        lbels, h2h, mci = perf_eva.cross_compare2(c_item, c_base, c_oracle)
        store_results(file_name,lbels=lbels,h2h=h2h,mci=mci)
        print("h2h = ",h2h)
    else:
        # compares results plus
        lbels, h2h, mci, pio = perf_eva.cross_compare2plus(c_item, c_base, c_oracle)
        store_results(file_name,lbels=lbels,h2h=h2h,mci=mci,pio=pio)
        print("h2h = ",h2h," pio = ",pio)
    return lbels, h2h, mci, pio

# Main process

In [18]:
## Train and test are together
seed = 3
## Train and test are together
start = time.time()
random.seed(seed)
iters = 100
p = 4
d = 10
samples_test = 10000
samples_train = 100
lower = 0
upper = 1
alpha = 1
n_epsilon = 1
mis = 1
thres = 10000
ver = 1
x_dister = 'uniform'
e_dister = 'normal'
xl = -2
xu = 2
xm = 2
xv = 0.25
#bp = abs(xl)*upper*d
bp = 7

mu = 0.25
lamb = 0.25

current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)
grandparent_directory = os.path.dirname(parent_directory)
DataPath = grandparent_directory + '/Data/Regret_H2H_Comparison/'
pathlib.Path(DataPath).mkdir(parents=True, exist_ok=True)
print("grandparent_directory:", grandparent_directory)
print("DataPath:", DataPath)

grandparent_directory: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization
DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data/Regret_H2H_Comparison/


# Obtain cost of each approach

In [19]:
c_oracle = obtain_cost(iters,DataPath,"Oracle.pkl")
c_ols = obtain_cost(iters,DataPath,"OLS.pkl")
c_ddr = obtain_cost(iters,DataPath,"DDR.pkl")

In [26]:
lbels_ddrols = []; h2h_ddrols = []; mci_ddrols = [];pio_ddrols = []
Data = {}
data_gen = data_generation()
perf_eva = performance_evaluation()
for i in range(iters):
    print("============== iteration = ",i,"==============")
    file_path = DataPath + "iter="+str(i) +"/"
    pathlib.Path(file_path).mkdir(parents=True, exist_ok=True)

    lbels_ddrols, h2h, mci,pio = calculate_comparison(file_path+"DDR_vs_OLS.pkl",perf_eva,c_ddr[i], c_ols[i], c_oracle[i],ypio = 1)
    h2h_ddrols.append(h2h); mci_ddrols.append(mci); pio_ddrols.append(pio)
    # print("h2h = ",h2h, "mci = ",mci)
    # calculate_comparison(file_path+"DDR_vs_SPO.pkl",perf_eva,c_ddr, c_spo, c_oracle,ypio = 1)
    # calculate_comparison(file_path+"Lasso_vs_OLS.pkl",perf_eva,c_lasso, c_ols, c_oracle,ypio = 1)
    # calculate_comparison(file_path+"Lasso_vs_DDR.pkl",perf_eva,c_lasso, c_ddr, c_oracle,ypio = 1)
    # calculate_comparison(file_path+"Ridge_vs_OLS.pkl",perf_eva,c_ridge, c_ols, c_oracle,ypio = 1)
    # calculate_comparison(file_path+"Ridge_vs_DDR.pkl",perf_eva,c_ridge, c_ddr, c_oracle,ypio = 1)

h2h =  0.6587301587301587  pio =  0.03507476215258564
h2h =  0.42142857142857143  pio =  0.003118474803942481
h2h =  0.6818181818181818  pio =  0.04439574301091119
h2h =  0.6035502958579881  pio =  0.014927833107800617
h2h =  0.4147727272727273  pio =  -0.005907309133409202
h2h =  0.5224719101123596  pio =  0.003257236066615833
h2h =  0.36363636363636365  pio =  -0.030777045171919426
h2h =  0.5555555555555556  pio =  0.016528690121354325
h2h =  0.4494949494949495  pio =  -0.0036800653251629575
h2h =  0.6203208556149733  pio =  0.02156939663867968
h2h =  0.8507462686567164  pio =  0.07096399180845175
h2h =  0.4230769230769231  pio =  -0.018891567676433373
h2h =  0.541095890410959  pio =  0.00524277852548161
h2h =  0.5902439024390244  pio =  0.02317548942067235
h2h =  0.5204081632653061  pio =  0.018238051452433946
h2h =  0.5583756345177665  pio =  0.01848143841638906
h2h =  0.6699029126213593  pio =  0.039756061376836016
h2h =  0.4214876033057851  pio =  -0.003956048104755489
h2h =  0.4

# calibrate 

In [28]:
print( min(mci_ddrols), np.argmin(mci_ddrols) )
print( max(pio_ddrols), np.argmax(pio_ddrols) )

-0.0008254129868500973 82
0.09915734715875203 82


In [39]:
max_index = np.argmax(pio_ddrols)
file_path = DataPath + "iter="+str(max_index) +"/"
with open(file_path+"Data.pkl", "rb") as f:
    Data = pickle.load(f)
# x_test, z_test_ori, z_test, x_train, z_train_ori, z_train, W_star = Data
x_test = Data["x_test"]
z_test_ori = Data["z_test_ori"]
z_test = Data["z_test"]
x_train = Data["x_train"]
z_train_ori = Data["z_train_ori"]
z_train = Data["z_train"]
W_star = Data["W_star"]
np.min(z_train)

np.float64(1.797775995277656)

In [40]:
## Solve and evaluate the OLS model
ols_method_obj = ols_method()
W_ols, w0_ols, t_ols, obj_ols = ols_method_obj.ols_solver(file_path,x_train, z_train)
z_test_ols, y_test_ols, c_test_ols = perf_eva.param_prediction_and_cost_estimation(x_test, W_ols, w0_ols, thres)
c_ols_true =  np.sum(np.minimum(z_test_ori,thres) * y_test_ols, axis = 1)
pac_ols = ( np.linalg.norm(z_test_ols - z_test) )**2/len(z_test) ## prediction accuracy

Set parameter Username
Academic license - for non-commercial use only - expires 2025-03-25


In [41]:
y_test_opt = perf_eva.decision_finder(z_test_ori)
c_oracle = np.mean(np.sum(z_test_ori * y_test_opt, axis = 1))

In [42]:
c_ddr_true_all = {}
pac_ddr_all = {}
pac_ddr_true_all = {}

mu_range = [-0.25 + i*0.05 for i in range(13)]
lamb_range = [0 + i*0.05 for i in range(81)]

ddr_method_obj = ddr_method()
for mu in mu_range:
    for lamb in lamb_range:
        # Obtain regression parameters
        W_ddr, w0_ddr, t_ddr = ddr_method_obj.ddr_solver(x_train, z_train, thres, mu, lamb)
        z_test_ddr, y_test_ddr, c_test_ddr = perf_eva.param_prediction_and_cost_estimation(x_test, W_ddr, w0_ddr, thres)
        c_ddr_true_all[mu,lamb] =  np.sum(np.minimum(z_test_ori,thres) * y_test_ddr, axis = 1)
        # obtain prediction accuracy
        pac_ddr_all[mu,lamb] = ( np.linalg.norm(z_test_ddr - z_test) )**2/len(z_test) ## prediction accuracy
        pac_ddr_true_all[mu,lamb] = ( np.linalg.norm(z_test_ddr - z_test_ori) )**2/len(z_test_ori) ## prediction accuracy
        

# Figures

In [None]:
def obtain_all_rst(iters,DataPath,file_name):
    all_h2h = []; all_mci = []
    for i in range(iters):
        file_path = DataPath + "iter="+str(i) +"/"
        with open(file_path+file_name, "rb") as f:
            file_pkl = pickle.load(f)
        all_h2h.append(100*file_pkl["h2h"])
        all_mci.append(100*file_pkl["mci"])
    return all_h2h,all_mci

# DDR vs OLS

In [None]:
all_h2h_ddrols,all_mci_ddrols = obtain_all_rst(iters,DataPath,"DDR_vs_OLS.pkl")

In [None]:
regret_h2h_fig.figure_plot_upleft(all_h2h_ddrols, all_mci_ddrols, figure_name = '411_ddr_ols', size = (5, 5), move = [-0.10, 0.04, 0.30, 0.55])

# Lasso vs OLS

In [None]:
all_h2h_lsools,all_mci_lsools = obtain_all_rst(iters,DataPath,"Lasso_vs_OLS.pkl")

In [None]:
regret_h2h_fig.figure_plot_upleft(all_h2h_lsools, all_mci_lsools, figure_name = '411_ddr_ols', size = (5, 5), move = [-0.10, 0.04, 0.30, 0.55])

# OLS vs SPO

In [None]:
all_h2h_olsspo,all_mci_olsspo = obtain_all_rst(iters,DataPath,"Lasso_vs_OLS.pkl")