In [1]:
import random
import numpy as np
import torch
import os
import pathlib
import pickle
from gurobipy import *
from rsome import ro
from rsome import grb_solver as grb
import rsome as rso
from rsome import cpt_solver as cpt
import pandas as pd
torch.manual_seed(42)
torch.cuda.manual_seed(42)

In [2]:
data_generation_process = "SPO_Data_Generation"
# data_generation_process = "DDR_Data_Generation"

# Parameters

In [3]:
# import pyepo
# generate data
grid = (3,3) # grid size
num_train = 100 # number of training data
num_feat = 5 # size of feature
num_test = 10000
deg = 1.0 # polynomial degree
e = 0.5 # scale of normal std or the range of uniform. For the error term

lower = 0 # coef lower bound
upper = 1 # coef upper bound
p = num_feat # num of features
d = (grid[0] - 1) * (grid[0] - 1) * 2 + 2 * (grid[0] - 1) # num of arcs
num_nodes = grid[0]*grid[0]
alpha = e # scale of normal std or the range of uniform. For the error term
mis = deg # model misspecification
coef_seed = 1

x_dist = 'uniform'
e_dist = 'normal'
x_low = -2
x_up = 2
x_mean = 2
x_var = 2
bump = 100

In [4]:
current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)
grandparent_directory = os.path.dirname(parent_directory)
DataPath = os.path.dirname(grandparent_directory) + '/Data/Shortest_Path_'+str(grid[0])+'by'+str(grid[1])+'_grid_0406_' + data_generation_process + "/"
pathlib.Path(DataPath).mkdir(parents=True, exist_ok=True)
print("grandparent_directory:", grandparent_directory)
print("DataPath:", DataPath)
DataPath = DataPath + "data_size="+str(num_train)+"_deg="+str(deg)+"_e="+str(e)+"_d="+str(d)+"_x_dist="+x_dist+"_coef_seed="+str(coef_seed)+"_diff_W_exp=1/"
pathlib.Path(DataPath).mkdir(parents=True, exist_ok=True)

grandparent_directory: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Code_MacBook
DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data/Shortest_Path_3by3_grid_0406_SPO_Data_Generation/


# Generate Data

In [5]:
def Prepare_Data(DataPath,lower, upper, p, d, coef_seed,iteration_all,num_test, num_train, alpha,mis,data_generation_process,x_dist, e_dist, x_low, x_up, x_mean, x_var, bump):
# #  ****** Coef generation *********
    from Data import data_generation
    data_gen = data_generation()
    # W_star = data_gen.generate_truth(DataPath,lower, upper, p, d, coef_seed,data_generation_process) 
    # print("W_star = ",W_star[0,:])
    np.random.seed(coef_seed)
    x_test_all = {}; c_test_all = {}; x_train_all = {}; c_train_all = {}; W_star_all = {}; noise_train_all = {}; noise_test_all = {}
    for iter in iteration_all:
        DataPath_iter = DataPath +"iter="+str(iter)+"/"
        pathlib.Path(DataPath_iter).mkdir(parents=True, exist_ok=True)
        W_star = data_gen.generate_truth(DataPath_iter,lower, upper, p, d, iter,data_generation_process) 
        # #  ****** Data generation *********
        x_test_all[iter], c_test_all[iter], x_train_all[iter], c_train_all[iter], noise_train_all[iter],noise_test_all[iter],W_star_all[iter] = data_gen.generate_samples(iter,DataPath_iter,p, d, num_test, num_train, alpha, W_star, mis, num_test, 
                                data_generation_process, x_dist, e_dist, x_low, x_up, x_mean, x_var, bump) 
        # print()
    return x_test_all, c_test_all, x_train_all, c_train_all, noise_train_all,noise_test_all,W_star_all

# EPO(SPO+,...)

In [6]:
def Implement_EPO(DataPath,iteration_all,batch_size,num_epochs,method_names,W_star_all,bump,x_train_all,c_train_all,x_test_all,noise_test_all,\
                  arcs,grid,epo_runner,perfs):
    W_EPO_all = {}; w0_EPO_all = {}
    cost_EPO_all = {}
    for iter in iteration_all:
        DataPath_seed = DataPath +"iter="+str(iter)+"/"
        pathlib.Path(DataPath_seed).mkdir(parents=True, exist_ok=True)
        # print("*** seed = ",seed,": Run EPO ******")
        W_EPO_all[iter],w0_EPO_all[iter] = epo_runner.run(method_names,DataPath_seed,batch_size,num_feat,grid,num_epochs,\
                                        x_train_all[iter],c_train_all[iter],arcs)
        
        cost_dem = (W_EPO_all[iter] @ x_test_all[iter].T).T + w0_EPO_all[iter]
        if data_generation_process == "SPO_Data_Generation":
            cost_oracle_ori = (W_star_all[iter] @ x_test_all[iter].T)/np.sqrt(num_feat) + 3
            cost_oracle_pred = (cost_oracle_ori ** mis + 1).T
            cost_EPO_all[iter] = perfs.compute_SPO_out_of_sample_Cost(arcs, grid,cost_dem,cost_oracle_pred,noise_test_all[iter])

        if data_generation_process == "DDR_Data_Generation":
            cost_oracle_ori = (W_star_all[iter] @ x_test_all[iter].T) + bump
            cost_oracle_pred = (cost_oracle_ori ** mis).T
            cost_EPO_all[iter] = perfs.compute_DDR_out_of_sample_Cost(arcs, grid,cost_dem,cost_oracle_pred,noise_test_all[iter])
        print(method_names[0],": iter=",iter,",cost=",np.nanmean(cost_EPO_all[iter]))

    return W_EPO_all,w0_EPO_all,cost_EPO_all

# Obtain DDR estimation

### define network

In [7]:
def _getArcs(grid):
    arcs = []
    for i in range(grid[0]):
        # edges on rows
        for j in range(grid[1] - 1):
            v = i * grid[1] + j
            arcs.append((v, v + 1))
        # edges in columns
        if i == grid[0] - 1:
            continue
        for j in range(grid[1]):
            v = i * grid[1] + j
            arcs.append((v, v + grid[1]))

    arc_index_mapping = {}
    for i in range(len(arcs)):
        arc = arcs[i]
        arc_index_mapping[arc] = i

    return arcs,arc_index_mapping

In [8]:
def solve_DDR(arcs,lamb,mu_fixed,num_nodes,x_train,c_train):
    
    N,p = x_train.shape
    N,d = c_train.shape

    # DDR
    m = Model("ddr")
    #m.setParam("DualReductions",0)
    m.setParam('OutputFlag', 0)

    W_ind = tuplelist( [(i,j) for i in range(d) for j in range(p)] )
    w0_ind = tuplelist( [i for i in range(d)])

    W_ddr = m.addVars(W_ind, lb=-GRB.INFINITY,name = "W" )
    w0_ddr = m.addVars(w0_ind, lb=-GRB.INFINITY,name = "W0" )
    alpha = m.addVars(N,num_nodes,lb=-GRB.INFINITY,name="alpha")
    expr_obj = 0
    err = []
    for n in range(N):
        cost_true_tem = c_train[n]
        expr_obj = expr_obj + alpha[n,num_nodes-1] - alpha[n,0]
        for ind in range(len(arcs)):
            cost_pred_tem = quicksum([W_ddr[ind,j] * x_train[n,j] for j in range(p)]) + w0_ddr[ind]
            err.append(cost_true_tem[ind] - cost_pred_tem)
            e = arcs[ind]
            j = e[1]
            i = e[0]
            # print("j = ",j,", i = ",i, ", e = ",e)
            m.addConstr(alpha[n,j] - alpha[n,i] >= -mu_fixed*cost_true_tem[ind] - (1-mu_fixed)*cost_pred_tem)

    m.setObjective(quicksum([err[k] * err[k] for k in range(len(err))])/N + lamb*(expr_obj)/N, GRB.MINIMIZE)
    m.optimize()
    
    W_DDR_rst = m.getAttr('x', W_ddr)
    w0_DDR_rst = m.getAttr('x', w0_ddr)
    W_ddr_val = []
    for i in range(d):
        W_ddr_val.append([W_DDR_rst[(i,j)] for j in range(p)])
    w0_ddr_val = [w0_DDR_rst[i] for i in range(d)]

    alpha_rst = m.getAttr('x', alpha)
    return w0_ddr_val,W_ddr_val,alpha_rst,m.ObjVal

# Out-of-sample performance

### Generate data

In [9]:
arcs,arc_index_mapping = _getArcs(grid)
num_arcs = len(arcs)
iteration_all = np.arange(0,100)
# obtain data
x_test_all, c_test_all, x_train_all, c_train_all,noise_train_all,noise_test_all,W_star_all = Prepare_Data(DataPath,lower, upper, p, d, coef_seed,iteration_all,num_test, num_train, alpha,mis,data_generation_process,x_dist, e_dist, x_low, x_up, x_mean, x_var, bump)

# Oracle

In [10]:
from Peformance import performance_evaluation
perfs = performance_evaluation()
cost_Oracle_para_all = {}; cost_Oracle_para_avg = {}
cost_Oracle_realization_all = {}; cost_Oracle_realization_avg = {}

for iter in iteration_all:
    if data_generation_process == "SPO_Data_Generation":
        cost_oracle_ori = (W_star_all[iter] @ x_test_all[iter].T)/np.sqrt(num_feat) + 3
        cost_oracle_pred = (cost_oracle_ori ** mis + 1).T
        cost_Oracle_para_all[iter] = perfs.compute_SPO_out_of_sample_Cost(arcs, grid,cost_oracle_pred,cost_oracle_pred,noise_test_all[iter])

    if data_generation_process == "DDR_Data_Generation":
        cost_oracle_ori = (W_star_all[iter] @ x_test_all[iter].T) + bump
        cost_oracle_pred = (cost_oracle_ori ** mis).T
        cost_Oracle_para_all[iter] = perfs.compute_DDR_out_of_sample_Cost(arcs, grid,cost_oracle_pred,cost_oracle_pred,noise_test_all[iter])
        # cost_Oracle_para_avg[iter] = np.nanmean(cost_Oracle_para_all[iter])
    print("Oracle: iter=",iter,",cost_avg=",np.nanmean(cost_Oracle_para_all[iter]))

Set parameter Username
Academic license - for non-commercial use only - expires 2026-03-13
Oracle: iter= 0 ,cost_avg= 15.198771245211319
Oracle: iter= 1 ,cost_avg= 15.13434629431474
Oracle: iter= 2 ,cost_avg= 15.131209453162493
Oracle: iter= 3 ,cost_avg= 14.778052254689698
Oracle: iter= 4 ,cost_avg= 15.022683773560418
Oracle: iter= 5 ,cost_avg= 15.090403193068504
Oracle: iter= 6 ,cost_avg= 15.269650290877843
Oracle: iter= 7 ,cost_avg= 15.088122258101343
Oracle: iter= 8 ,cost_avg= 15.013172681939817
Oracle: iter= 9 ,cost_avg= 15.092046000496161
Oracle: iter= 10 ,cost_avg= 14.809460432722345
Oracle: iter= 11 ,cost_avg= 14.825011674465403
Oracle: iter= 12 ,cost_avg= 14.849206617118119
Oracle: iter= 13 ,cost_avg= 14.887739402768348
Oracle: iter= 14 ,cost_avg= 14.915407036329157
Oracle: iter= 15 ,cost_avg= 15.06790842654837
Oracle: iter= 16 ,cost_avg= 14.864757873377311
Oracle: iter= 17 ,cost_avg= 14.922894580086675
Oracle: iter= 18 ,cost_avg= 15.032092258376247
Oracle: iter= 19 ,cost_avg= 

### EPO performance

In [11]:
# EPO, including SPO, PG, LTR
batch_size = 20
num_epochs = 1000
# method_names = ["spo+","pg","ltr"]
from PYEPO import PyEPO_Method
epo_runner = PyEPO_Method()
from Peformance import performance_evaluation
perfs = performance_evaluation()

method_names = ["spo+"]
W_SPO_all,w0_SPO_all,cost_SPO_all = Implement_EPO(DataPath,iteration_all,batch_size,num_epochs,method_names,W_star_all,bump,x_train_all,c_train_all,x_test_all,noise_test_all,\
                  arcs,grid,epo_runner,perfs)

# method_names = ["pg"]
# W_PG_all,w0_PG_all,cost_PG_all = Implement_EPO(DataPath,iteration_all,batch_size,num_epochs,method_names,W_star_all,bump,x_train_all,c_train_all,x_test_all,noise_test_all,\
#                   arcs,grid,epo_runner,perfs)

# method_names = ["ltr"]
# W_LTR_all,w0_LTR_all,cost_LTR_all = Implement_EPO(DataPath,iteration_all,batch_size,num_epochs,method_names,W_star_all,bump,x_train_all,c_train_all,x_test_all,noise_test_all,\
#                   arcs,grid,epo_runner,perfs)

Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 7510.89it/s]


Num of cores: 2
spo+ : iter= 0 ,cost= 15.953296400469004
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4345.39it/s]


Num of cores: 2
spo+ : iter= 1 ,cost= 15.842170290052923
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 8218.32it/s]


Num of cores: 2
spo+ : iter= 2 ,cost= 15.93159039489277
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4246.45it/s]


Num of cores: 2
spo+ : iter= 3 ,cost= 15.862096314005028
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4407.82it/s]


Num of cores: 2
spo+ : iter= 4 ,cost= 16.106217421799943
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4686.90it/s]


Num of cores: 2
spo+ : iter= 5 ,cost= 16.12351885752815
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4947.81it/s]


Num of cores: 2
spo+ : iter= 6 ,cost= 16.17799828074668
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4036.32it/s]


Num of cores: 2
spo+ : iter= 7 ,cost= 15.8397030194146
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4688.00it/s]


Num of cores: 2
spo+ : iter= 8 ,cost= 15.748596148510996
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4484.50it/s]


Num of cores: 2
spo+ : iter= 9 ,cost= 15.886882905879073
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4506.56it/s]


Num of cores: 2
spo+ : iter= 10 ,cost= 15.95267163971456
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4406.24it/s]


Num of cores: 2
spo+ : iter= 11 ,cost= 16.17463771330582
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4390.15it/s]


Num of cores: 2
spo+ : iter= 12 ,cost= 15.91786566226304
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4084.47it/s]


Num of cores: 2
spo+ : iter= 13 ,cost= 15.3536496781164
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4190.28it/s]


Num of cores: 2
spo+ : iter= 14 ,cost= 15.628437942444096
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4505.31it/s]


Num of cores: 2
spo+ : iter= 15 ,cost= 15.37273374438206
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 5085.98it/s]


Num of cores: 2
spo+ : iter= 16 ,cost= 16.295424645092826
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4134.64it/s]


Num of cores: 2
spo+ : iter= 17 ,cost= 15.403769457562795
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4553.28it/s]


Num of cores: 2
spo+ : iter= 18 ,cost= 16.168863510056624
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 5149.29it/s]


Num of cores: 2
spo+ : iter= 19 ,cost= 15.603021715114824
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4230.47it/s]


Num of cores: 2
spo+ : iter= 20 ,cost= 15.654623666347803
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4112.02it/s]


Num of cores: 2
spo+ : iter= 21 ,cost= 15.938631220012839
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4316.46it/s]


Num of cores: 2
spo+ : iter= 22 ,cost= 16.211247557273747
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 7001.83it/s]


Num of cores: 2
spo+ : iter= 23 ,cost= 16.235372650769726
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4383.86it/s]


Num of cores: 2
spo+ : iter= 24 ,cost= 15.73799578685977
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 6040.45it/s]


Num of cores: 2
spo+ : iter= 25 ,cost= 15.909477020717295
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4473.11it/s]


Num of cores: 2
spo+ : iter= 26 ,cost= 16.204679528184165
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 7095.16it/s]


Num of cores: 2
spo+ : iter= 27 ,cost= 15.793045521453633
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4071.19it/s]


Num of cores: 2
spo+ : iter= 28 ,cost= 15.8701581917218
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4002.39it/s]


Num of cores: 2
spo+ : iter= 29 ,cost= 16.50383853897253
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4227.96it/s]


Num of cores: 2
spo+ : iter= 30 ,cost= 16.256666129940843
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4316.28it/s]


Num of cores: 2
spo+ : iter= 31 ,cost= 15.446862882136926
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4214.96it/s]


Num of cores: 2
spo+ : iter= 32 ,cost= 16.42853665153644
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 6855.90it/s]


Num of cores: 2
spo+ : iter= 33 ,cost= 15.838456016861953
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4154.34it/s]


Num of cores: 2
spo+ : iter= 34 ,cost= 15.611604342540229
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4598.16it/s]


Num of cores: 2
spo+ : iter= 35 ,cost= 15.823517229482627
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4440.06it/s]


Num of cores: 2
spo+ : iter= 36 ,cost= 16.147509220389857
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 5824.45it/s]


Num of cores: 2
spo+ : iter= 37 ,cost= 15.67302333952131
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 6931.02it/s]


Num of cores: 2
spo+ : iter= 38 ,cost= 15.978267343668621
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4272.75it/s]


Num of cores: 2
spo+ : iter= 39 ,cost= 16.205778554714637
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4236.93it/s]


Num of cores: 2
spo+ : iter= 40 ,cost= 15.595830170616269
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 5909.55it/s]


Num of cores: 2
spo+ : iter= 41 ,cost= 15.965883159454394
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 6369.00it/s]


Num of cores: 2
spo+ : iter= 42 ,cost= 16.181546288157683
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4556.25it/s]


Num of cores: 2
spo+ : iter= 43 ,cost= 15.831874503943679
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 3224.78it/s]


Num of cores: 2
spo+ : iter= 44 ,cost= 16.20936638634322
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 5253.52it/s]


Num of cores: 2
spo+ : iter= 45 ,cost= 15.751813331064183
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4168.42it/s]


Num of cores: 2
spo+ : iter= 46 ,cost= 15.947665523963195
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4060.75it/s]


Num of cores: 2
spo+ : iter= 47 ,cost= 15.768591633816639
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4178.72it/s]


Num of cores: 2
spo+ : iter= 48 ,cost= 15.58251993578041
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4238.17it/s]


Num of cores: 2
spo+ : iter= 49 ,cost= 16.4230893344544
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 7222.84it/s]


Num of cores: 2
spo+ : iter= 50 ,cost= 16.598265342815118
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 3887.29it/s]


Num of cores: 2
spo+ : iter= 51 ,cost= 15.618850913612302
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4027.91it/s]


Num of cores: 2
spo+ : iter= 52 ,cost= 15.939652127283814
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4521.77it/s]


Num of cores: 2
spo+ : iter= 53 ,cost= 16.02815377622264
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 6834.45it/s]


Num of cores: 2
spo+ : iter= 54 ,cost= 16.122148511411314
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4431.43it/s]


Num of cores: 2
spo+ : iter= 55 ,cost= 15.497296226112718
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4252.82it/s]


Num of cores: 2
spo+ : iter= 56 ,cost= 16.125633699606254
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4318.86it/s]


Num of cores: 2
spo+ : iter= 57 ,cost= 15.903977011442857
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4408.65it/s]


Num of cores: 2
spo+ : iter= 58 ,cost= 16.119558828256682
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4181.84it/s]


Num of cores: 2
spo+ : iter= 59 ,cost= 15.954460246466734
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4909.70it/s]


Num of cores: 2
spo+ : iter= 60 ,cost= 15.788010392503612
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4254.38it/s]


Num of cores: 2
spo+ : iter= 61 ,cost= 15.77307608051409
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 7353.40it/s]


Num of cores: 2
spo+ : iter= 62 ,cost= 15.798115100311756
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 3912.82it/s]


Num of cores: 2
spo+ : iter= 63 ,cost= 15.714475639320977
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4366.61it/s]


Num of cores: 2
spo+ : iter= 64 ,cost= 15.865212480399105
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 5273.67it/s]


Num of cores: 2
spo+ : iter= 65 ,cost= 15.321966780850765
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4097.68it/s]


Num of cores: 2
spo+ : iter= 66 ,cost= 15.57273609101076
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4367.93it/s]


Num of cores: 2
spo+ : iter= 67 ,cost= 15.920601919568814
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4221.19it/s]


Num of cores: 2
spo+ : iter= 68 ,cost= 15.814240783943582
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 3651.51it/s]


Num of cores: 2
spo+ : iter= 69 ,cost= 15.889812252755032
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4527.58it/s]


Num of cores: 2
spo+ : iter= 70 ,cost= 16.201253069657426
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 7086.17it/s]


Num of cores: 2
spo+ : iter= 71 ,cost= 15.57890153141913
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4851.04it/s]


Num of cores: 2
spo+ : iter= 72 ,cost= 15.604201367837662
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4408.00it/s]


Num of cores: 2
spo+ : iter= 73 ,cost= 15.910434478538928
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4210.56it/s]


Num of cores: 2
spo+ : iter= 74 ,cost= 15.903609214589912
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4218.69it/s]


Num of cores: 2
spo+ : iter= 75 ,cost= 15.458424301244778
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4194.98it/s]


Num of cores: 2
spo+ : iter= 76 ,cost= 16.1996990915808
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4133.95it/s]


Num of cores: 2
spo+ : iter= 77 ,cost= 16.29587599366054
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4066.69it/s]


Num of cores: 2
spo+ : iter= 78 ,cost= 15.92612998855845
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 7141.31it/s]


Num of cores: 2
spo+ : iter= 79 ,cost= 15.625152402146513
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4198.04it/s]


Num of cores: 2
spo+ : iter= 80 ,cost= 15.411315476980509
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4395.58it/s]


Num of cores: 2
spo+ : iter= 81 ,cost= 15.985750280197045
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 5119.56it/s]


Num of cores: 2
spo+ : iter= 82 ,cost= 16.443263987495648
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 3826.88it/s]


Num of cores: 2
spo+ : iter= 83 ,cost= 16.193863725981338
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4038.93it/s]


Num of cores: 2
spo+ : iter= 84 ,cost= 16.107423041083745
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4141.25it/s]


Num of cores: 2
spo+ : iter= 85 ,cost= 15.929920417203885
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 3973.50it/s]


Num of cores: 2
spo+ : iter= 86 ,cost= 15.64492666186011
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4288.13it/s]


Num of cores: 2
spo+ : iter= 87 ,cost= 15.980689139404431
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4299.95it/s]


Num of cores: 2
spo+ : iter= 88 ,cost= 16.191050659953603
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4347.69it/s]


Num of cores: 2
spo+ : iter= 89 ,cost= 16.081679145463738
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 5007.23it/s]


Num of cores: 2
spo+ : iter= 90 ,cost= 15.6529115492915
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 3791.32it/s]


Num of cores: 2
spo+ : iter= 91 ,cost= 15.885951452239166
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4454.92it/s]


Num of cores: 2
spo+ : iter= 92 ,cost= 16.02323921847219
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4317.26it/s]


Num of cores: 2
spo+ : iter= 93 ,cost= 15.773850756385194
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4206.46it/s]


Num of cores: 2
spo+ : iter= 94 ,cost= 15.413403756913135
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 5159.43it/s]


Num of cores: 2
spo+ : iter= 95 ,cost= 16.089031726102938
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 3754.17it/s]


Num of cores: 2
spo+ : iter= 96 ,cost= 15.708540961891943
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 3884.05it/s]


Num of cores: 2
spo+ : iter= 97 ,cost= 16.14342626708817
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 4754.21it/s]


Num of cores: 2
spo+ : iter= 98 ,cost= 16.23869802438665
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 3949.62it/s]


Num of cores: 2
spo+ : iter= 99 ,cost= 15.830634527622763


### OLS performance

In [12]:
from OLS import ols_method
ols_method_obj = ols_method()
W_ols_all = {}; w0_ols_all = {}; t_ols_all = {}; obj_ols_all = {}
cost_OLS_all = {}
for iter in iteration_all:
    # compute OLS performance
    W_ols_all[iter], w0_ols_all[iter], t_ols_all[iter], obj_ols_all[iter] = ols_method_obj.ols_solver("",x_train_all[iter], c_train_all[iter])
    cost_dem = (W_ols_all[iter] @ x_test_all[iter].T).T + w0_ols_all[iter]

    if data_generation_process == "SPO_Data_Generation":
        cost_oracle_ori = (W_star_all[iter] @ x_test_all[iter].T)/np.sqrt(num_feat) + 3
        cost_oracle_pred = (cost_oracle_ori ** mis + 1).T
        cost_OLS_all[iter] = perfs.compute_SPO_out_of_sample_Cost(arcs, grid,cost_dem,cost_oracle_pred,noise_test_all[iter])

    if data_generation_process == "DDR_Data_Generation":
        cost_oracle_ori = (W_star_all[iter] @ x_test_all[iter].T) + bump
        cost_oracle_pred = (cost_oracle_ori ** mis).T
        cost_OLS_all[iter] = perfs.compute_DDR_out_of_sample_Cost(arcs, grid,cost_dem,cost_oracle_pred,noise_test_all[iter])
    print("OLS: iter=",iter,",cost_avg ratio =",np.nanmean(cost_OLS_all[iter]))

OLS: iter= 0 ,cost_avg ratio = 15.3693120811629
OLS: iter= 1 ,cost_avg ratio = 15.276839519535102
OLS: iter= 2 ,cost_avg ratio = 15.203512465270903
OLS: iter= 3 ,cost_avg ratio = 14.904444409318696
OLS: iter= 4 ,cost_avg ratio = 15.081614491376973
OLS: iter= 5 ,cost_avg ratio = 15.313624774831242
OLS: iter= 6 ,cost_avg ratio = 15.450699840066617
OLS: iter= 7 ,cost_avg ratio = 15.2459621660006
OLS: iter= 8 ,cost_avg ratio = 15.089716086809018
OLS: iter= 9 ,cost_avg ratio = 15.221130219791815
OLS: iter= 10 ,cost_avg ratio = 14.91130060502548
OLS: iter= 11 ,cost_avg ratio = 14.94111856171883
OLS: iter= 12 ,cost_avg ratio = 14.944505626231342
OLS: iter= 13 ,cost_avg ratio = 14.980762553982151
OLS: iter= 14 ,cost_avg ratio = 15.05243373071492
OLS: iter= 15 ,cost_avg ratio = 15.20234148151378
OLS: iter= 16 ,cost_avg ratio = 14.965162789263024
OLS: iter= 17 ,cost_avg ratio = 15.122586873938276
OLS: iter= 18 ,cost_avg ratio = 15.133899567645942
OLS: iter= 19 ,cost_avg ratio = 15.29106552392893

### DDR performance

In [13]:
def obtain_DDR_out_of_sample_performance(mu_all,lamb_all,num_nodes,x_train,c_train,x_test,noise_test,perfs,grid,W_star,bump,mis):
    lamb_index = 0
    cost_DDR = {}; w0_ddr_dict = {}; W_ddr_dict = {}
    cost_DDR_avg = np.zeros((len(mu_all),len(lamb_all)))
    mu_index = 0
    for mu in mu_all:
        lamb_index = 0
        for lamb in lamb_all:

            w0_ddr_dict[mu,lamb],W_ddr_dict[mu,lamb],alpha_rst,obj_ddr = solve_DDR(arcs,lamb,mu,num_nodes,x_train,c_train)
            cost_dem = (W_ddr_dict[mu,lamb] @ x_test.T).T + w0_ddr_dict[mu,lamb]

            if data_generation_process == "SPO_Data_Generation":
                cost_oracle_ori = (W_star @ x_test.T)/np.sqrt(num_feat) + 3
                cost_oracle_pred = (cost_oracle_ori ** mis + 1).T
                cost_DDR[mu,lamb] = perfs.compute_SPO_out_of_sample_Cost(arcs, grid,cost_dem,cost_oracle_pred,noise_test)

            if data_generation_process == "DDR_Data_Generation":
                cost_oracle_ori = (W_star @ x_test.T) + bump
                cost_oracle_pred = (cost_oracle_ori ** mis).T
                cost_DDR[mu,lamb] = perfs.compute_DDR_out_of_sample_Cost(arcs, grid,cost_dem,cost_oracle_pred,noise_test)

            cost_DDR_avg[mu_index,lamb_index] = np.nanmean(cost_DDR[mu,lamb])
            lamb_index = lamb_index + 1
        # print("cost_DDR_avg=",np.round(cost_DDR_avg[0,:],4))
        mu_index = mu_index + 1
    return cost_DDR,w0_ddr_dict,W_ddr_dict,cost_DDR_avg

In [14]:
def obtain_ddr_rst(iteration_all,mu_all,lamb_all,num_nodes,x_train_all,c_train_all,x_test_all,noise_test_all,perfs,grid,W_star_all,bump,mis):
    cost_DDR_all = {}; w0_ddr_all = {}; W_ddr_all = {}
    cost_DDR_avg_all = {}
    for iter in iteration_all:
        cost_DDR_all[iter],w0_ddr_all[iter],W_ddr_all[iter],cost_DDR_avg_all[iter] = obtain_DDR_out_of_sample_performance(mu_all,lamb_all,num_nodes,x_train_all[iter],c_train_all[iter],x_test_all[iter],noise_test_all[iter],perfs,grid,W_star_all[iter],bump,mis)
        print("DDR iter = ",iter)
    return cost_DDR_all,w0_ddr_all,W_ddr_all,cost_DDR_avg_all

In [15]:
mu_all = np.round(np.arange(0.1,1.0,0.1),4)
lamb_all = np.round(np.arange(0.0,1.0,0.1),4)
cost_DDR_all,w0_ddr_all,W_ddr_all,cost_DDR_avg_all = obtain_ddr_rst(iteration_all,mu_all,lamb_all,num_nodes,x_train_all,c_train_all,x_test_all,noise_test_all,perfs,grid,W_star_all,bump,mis)

DDR iter =  0
DDR iter =  1
DDR iter =  2
DDR iter =  3
DDR iter =  4
DDR iter =  5
DDR iter =  6
DDR iter =  7
DDR iter =  8
DDR iter =  9
DDR iter =  10
DDR iter =  11
DDR iter =  12
DDR iter =  13
DDR iter =  14
DDR iter =  15
DDR iter =  16
DDR iter =  17
DDR iter =  18
DDR iter =  19
DDR iter =  20
DDR iter =  21
DDR iter =  22
DDR iter =  23
DDR iter =  24
DDR iter =  25
DDR iter =  26
DDR iter =  27
DDR iter =  28
DDR iter =  29
DDR iter =  30
DDR iter =  31
DDR iter =  32
DDR iter =  33
DDR iter =  34
DDR iter =  35
DDR iter =  36
DDR iter =  37
DDR iter =  38
DDR iter =  39
DDR iter =  40
DDR iter =  41
DDR iter =  42
DDR iter =  43
DDR iter =  44
DDR iter =  45
DDR iter =  46
DDR iter =  47
DDR iter =  48
DDR iter =  49
DDR iter =  50
DDR iter =  51
DDR iter =  52
DDR iter =  53
DDR iter =  54
DDR iter =  55
DDR iter =  56
DDR iter =  57
DDR iter =  58
DDR iter =  59
DDR iter =  60
DDR iter =  61
DDR iter =  62
DDR iter =  63
DDR iter =  64
DDR iter =  65
DDR iter =  66
DDR i

In [16]:
regret_ddr_vs_ols_para_all = {}; regret_ddr_vs_ols_para_avg = np.zeros((len(mu_all),len(lamb_all)))

cost_OLS_avg_all = np.zeros(len(iteration_all))
cost_Oracle_para_avg_all = np.zeros(len(iteration_all))
cost_Oracle_realization_avg_all = np.zeros(len(iteration_all))

for iter_index in range(len(iteration_all)):
    iter = iteration_all[iter_index]
    cost_OLS_avg_all[iter_index] = np.nanmean(cost_OLS_all[iter])
    cost_Oracle_para_avg_all[iter_index] = np.nanmean(cost_Oracle_para_all[iter])

    regret_ddr_vs_ols_para_all[iter_index] = (cost_OLS_avg_all[iter_index] - cost_DDR_avg_all[iter])/np.abs(cost_OLS_avg_all[iter_index] - cost_Oracle_para_avg_all[iter_index])
    regret_ddr_vs_ols_para_avg = regret_ddr_vs_ols_para_avg + regret_ddr_vs_ols_para_all[iter_index]

regret_ddr_vs_ols_para_avg = regret_ddr_vs_ols_para_avg/len(iteration_all)

regret_DDR_vs_OLS_para_avg_df = pd.DataFrame(regret_ddr_vs_ols_para_avg)
regret_DDR_vs_OLS_para_avg_df.index = mu_all
regret_DDR_vs_OLS_para_avg_df.columns = lamb_all
regret_DDR_vs_OLS_para_avg_df.to_csv(DataPath+"regret_DDR_vs_OLS_para_avg.csv")

In [None]:
with open(DataPath+'cost_OLS_all.pkl', "wb") as tf:
    pickle.dump(cost_OLS_all,tf)
with open(DataPath+'cost_Oracle_para_all.pkl', "wb") as tf:
    pickle.dump(cost_Oracle_para_all,tf)
with open(DataPath+'cost_DDR_all.pkl', "wb") as tf:
    pickle.dump(cost_DDR_all,tf)
with open(DataPath+'cost_SPO_all.pkl', "wb") as tf:
    pickle.dump(cost_SPO_all,tf)

# Plot figures

In [18]:
def cross_compare2plus(c_item, c_base, c_oracle):
    c_item = np.asarray(c_item)
    c_base = np.asarray(c_base)
    c_oracle = np.asarray(c_oracle)

    N = len(c_item)
    c_diff = c_item - c_base
    lbel = np.zeros((N,1))
    
    equals = np.sum(c_diff == 0)
    wins = np.sum(c_diff < 0)
    lose = np.sum(c_diff > 0)
    
    lbel[c_diff < 0] = 1
    lbel[c_diff > 0] = -1
    
    # print("Num_train =",N,",Num_equals =",equals,",Num_wins =",wins,",Num_lose =",lose)
    # print("base cost = ", np.mean(c_base),",item cost = ",np.mean(c_item))
    if N == equals:
        win_ratio = 0.5
    else:
        win_ratio = wins/(N - equals)
    # cost_reduction = (np.mean(c_base) - np.mean(c_item) )/np.abs(np.mean(c_oracle))
    regret_reduction = (np.nanmean(c_base) - np.nanmean(c_item))/np.abs(np.nanmean(c_base) - np.nanmean(c_oracle))
    return lbel, win_ratio, regret_reduction

### DDR figure

In [19]:
# H2H_DDR_vs_OLS_all = {}; regret_reduction_DDR_vs_OLS_all = {}
# for mu in mu_all:
#     for lamb in lamb_all:
#         H2H_DDR_vs_OLS_arr = np.zeros(len(iteration_all)); regret_reduction_DDR_vs_OLS_arr = np.zeros(len(iteration_all))
#         # print("lamb = ",lamb)
#         iter_index = 0
#         for iter in iteration_all:
#             lbel, H2H_DDR_vs_OLS_arr[iter_index], regret_reduction_DDR_vs_OLS_arr[iter_index] = cross_compare2plus(cost_DDR_all[iter][mu,lamb],cost_OLS_all[iter], cost_Oracle_para_avg_all[iter])
#             iter_index = iter_index + 1
#         H2H_DDR_vs_OLS_all[mu,lamb] = H2H_DDR_vs_OLS_arr; regret_reduction_DDR_vs_OLS_all[mu,lamb] = regret_reduction_DDR_vs_OLS_arr

In [20]:
# mu = mu_all[4]
# lamb = lamb_all[6]
# import Figures
# file_name = DataPath + "figure_mu="+str(mu)+"_lamb="+str(lamb)
# Figures.figure_plot_upleft(H2H_DDR_vs_OLS_all[mu,lamb]*100, regret_reduction_DDR_vs_OLS_all[mu,lamb]*100, file_name, size = (5, 5), move = [-0.12, 0.04, 0.35, 0.55], 
#                     ysame = 0, yrange = [6,6], sublabel = '', ypio = 1)

### SPO figure

In [21]:
# H2H_SPO_vs_OLS_all = np.zeros(len(iteration_all)); regret_reduction_SPO_vs_OLS_all = np.zeros(len(iteration_all))
# iter_index = 0
# for iter in iteration_all:
#     regret_SPO_OLS = (np.nanmean(cost_OLS_all[iter]) - np.nanmean(cost_SPO_all[iter]))/np.abs(np.nanmean(cost_OLS_all[iter_index]) - np.nanmean(cost_Oracle_para_all[iter]))

#     regret_SPO_OLS 
#     print("iter=",iter,",SPO cost Ratio = ",np.nanmean(cost_SPO_all[iter])/np.nanmean(cost_Oracle_para_all[iter]),",regret=",regret_SPO_OLS)

#     # lbel, H2H_SPO_vs_OLS_all[iter_index], regret_reduction_SPO_vs_OLS_all[iter_index] = cross_compare2plus(cost_SPO_all[iter],cost_OLS_all[iter], cost_Oracle_para_avg_all[iter])
#     iter_index = iter_index + 1

In [22]:
np.nanmean(cost_OLS_all[iter])

np.float64(15.230254699877122)

In [23]:
np.nanmean(cost_SPO_all[iter])

np.float64(15.830634527622763)

In [24]:
# Figures.figure_plot_upleft(H2H_SPO_vs_OLS_all*100, regret_reduction_SPO_vs_OLS_all*100, "", size = (5, 5), move = [-0.12, 0.04, 0.35, 0.55], 
#                     ysame = 0, yrange = [6,6], sublabel = '', ypio = 1)

In [25]:
# regret_reduction_SPO_vs_OLS_arr