In [1]:
import random
import numpy as np
import torch
import os
import pathlib
import pickle
import pandas as pd
torch.manual_seed(42)
torch.cuda.manual_seed(42)

import Figure_H2H_Regret

from Performance import performance_evaluation
perfs = performance_evaluation()

from Performance import H2h_Regret_Evaluation
h2h_regret_eva = H2h_Regret_Evaluation()

from Data import Data_Simulator
DS_Obj = Data_Simulator()

from Oracle import Oracle_Processing
Oracle_Proc = Oracle_Processing()

from OLS import OLS_Processing
OLS_Proc = OLS_Processing()

from DDR import DDR_Processing
DDR_Proc = DDR_Processing()

from PYEPO import EPO_Processing
PYEPO_Proc = EPO_Processing()

from Data_Load_Store import Load_Store_Methods
Data_LSM = Load_Store_Methods()

In [2]:
import Baseline_Params 
import Sequential_Learning_Approaches as SLA
import Integrated_Learning_Approaches as ILA

## Parameters

In [3]:
data_generation_process = "SPO_Data_Generation"
# data_generation_process = "DDR_Data_Generation"

current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)
project_directory = os.path.dirname(os.path.dirname(os.path.dirname(parent_directory)))
middle_path = '/Data_JOC_R1/Shortest_Path_0620/Baseline_' + data_generation_process +"/"
working_dir = project_directory + middle_path
Result_dir = project_directory + middle_path + "Result/"
pathlib.Path(working_dir).mkdir(parents=True, exist_ok=True)
pathlib.Path(Result_dir).mkdir(parents=True, exist_ok=True)

In [4]:
Baseline_Params.set_Params(working_dir)
num_train, num_feat, num_test, deg, e, lower, upper, p, alpha, mis, coef_seed, x_dist, e_dist, x_low, x_up, x_mean, x_var, bump, grid_all, iteration_all,mu_all, lamb_all = Baseline_Params.get_Params(working_dir)

In [5]:
num_train = 300

### Data Generation

In [6]:
for grid in grid_all:
    L_N = grid[0]
    d = (L_N - 1) * (L_N - 1) * 2 + 2 * (L_N - 1) # num of arcs
    num_nodes = L_N*L_N

    DataPath_parent = project_directory + middle_path +str(L_N)+'by'+str(L_N)+'_grid/'
    DataPath = DataPath_parent + f"data_size={num_train}_deg={deg}_e={e}_num_test={num_test}"+"_x_dist="+x_dist+"_e_dist="+e_dist+"/"
    pathlib.Path(DataPath).mkdir(parents=True, exist_ok=True)
    print("DataPath:", DataPath)
    if os.path.exists(DataPath+"x_test_all.pkl"):
        print("Already Exist")
    else:
        x_test_all, c_test_all, x_train_all, c_train_all,noise_train_all,noise_test_all,W_star_all = \
        DS_Obj.Simulator(DataPath,lower, upper, p, d, coef_seed,iteration_all,num_test, num_train, alpha,mis,data_generation_process,x_dist, e_dist, x_low, x_up, x_mean, x_var, bump)

        Data_LSM.store_input_data(DataPath,x_test_all,c_test_all,x_train_all,c_train_all,noise_test_all,noise_train_all,W_star_all)

DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data_JOC_R1/Shortest_Path_0620/Baseline_SPO_Data_Generation/2by2_grid/data_size=300_deg=1.0_e=0.5_num_test=1000_x_dist=uniform_e_dist=normal/
DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data_JOC_R1/Shortest_Path_0620/Baseline_SPO_Data_Generation/3by3_grid/data_size=300_deg=1.0_e=0.5_num_test=1000_x_dist=uniform_e_dist=normal/
DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data_JOC_R1/Shortest_Path_0620/Baseline_SPO_Data_Generation/4by4_grid/data_size=300_deg=1.0_e=0.5_num_test=1000_x_dist=uniform_e_dist=normal/
DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data_JOC_R1/Shortest_Path_0620/Baseline_SPO_Data_Generation/5by5_grid/data_size=300_deg=1.0_e=0.5_num_test=1000_x_dist=uniform_e_dist=normal/


### Run Oracle

In [7]:
for grid in grid_all:
    from Network import network_design
    Network = network_design()
    arcs,arc_index_mapping = Network._getArcs(grid)
    L_N = grid[0]

    DataPath_parent = project_directory + middle_path +str(L_N)+'by'+str(L_N)+'_grid/'
    DataPath = DataPath_parent + f"data_size={num_train}_deg={deg}_e={e}_num_test={num_test}"+"_x_dist="+x_dist+"_e_dist="+e_dist+"/"
    print("DataPath:", DataPath)
    if os.path.exists(DataPath+"cost_Oracle_Ante_all.pkl"):
        print("Results already exist")
    else:
        SLA.Run_Oracle(DataPath,arcs, grid,mis,bump,iteration_all,num_feat,data_generation_process)

DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data_JOC_R1/Shortest_Path_0620/Baseline_SPO_Data_Generation/2by2_grid/data_size=300_deg=1.0_e=0.5_num_test=1000_x_dist=uniform_e_dist=normal/
Set parameter Username
Academic license - for non-commercial use only - expires 2026-03-13
Oracle: iter= 20 ,cost_Oracle_Ante= 7.617615958648626
Oracle: iter= 40 ,cost_Oracle_Ante= 7.688473341569586
Oracle: iter= 60 ,cost_Oracle_Ante= 7.722031955040372
Oracle: iter= 80 ,cost_Oracle_Ante= 7.585836194396212
DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data_JOC_R1/Shortest_Path_0620/Baseline_SPO_Data_Generation/3by3_grid/data_size=300_deg=1.0_e=0.5_num_test=1000_x_dist=uniform_e_dist=normal/
Oracle: iter= 20 ,cost_Oracle_Ante= 15.15116938905062
Oracle: iter= 40 ,cost_Oracle_Ante= 14.599419289956916
Oracle: iter= 60 ,cost_Oracle_Ante= 14.98302713788994
Oracle: iter= 80 ,cost_Oracle_Ante= 14.872775043391423
DataPath: /Users/zhangxun/Dropbox/Research/

### Run OLS

In [8]:
for grid in grid_all:
    from Network import network_design
    Network = network_design()
    arcs,arc_index_mapping = Network._getArcs(grid)
    L_N = grid[0]

    DataPath_parent = project_directory + middle_path +str(L_N)+'by'+str(L_N)+'_grid/'
    DataPath = DataPath_parent + f"data_size={num_train}_deg={deg}_e={e}_num_test={num_test}"+"_x_dist="+x_dist+"_e_dist="+e_dist+"/"
    print("DataPath:", DataPath)
    if os.path.exists(DataPath+"cost_OLS_Ante_all.pkl"):
        print("Results already exist")
    else:
        SLA.Run_OLS(DataPath,arcs, grid,mis,bump,iteration_all,num_feat,data_generation_process)

DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data_JOC_R1/Shortest_Path_0620/Baseline_SPO_Data_Generation/2by2_grid/data_size=300_deg=1.0_e=0.5_num_test=1000_x_dist=uniform_e_dist=normal/
OLS: iter= 20 ,cost_OLS_Ante= 7.628004529675882
OLS: iter= 40 ,cost_OLS_Ante= 7.6987910079722335
OLS: iter= 60 ,cost_OLS_Ante= 7.726611022206807
OLS: iter= 80 ,cost_OLS_Ante= 7.602819504381649
DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data_JOC_R1/Shortest_Path_0620/Baseline_SPO_Data_Generation/3by3_grid/data_size=300_deg=1.0_e=0.5_num_test=1000_x_dist=uniform_e_dist=normal/
OLS: iter= 20 ,cost_OLS_Ante= 15.204095364652694
OLS: iter= 40 ,cost_OLS_Ante= 14.630723384929585
OLS: iter= 60 ,cost_OLS_Ante= 15.039580392195464
OLS: iter= 80 ,cost_OLS_Ante= 14.898884146204654
DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data_JOC_R1/Shortest_Path_0620/Baseline_SPO_Data_Generation/4by4_grid/data_size=300_deg=1.0_e=0.5_num_tes

### Run DDR

In [9]:
# mu_all = np.round(np.arange(0.2,1.0,0.05),4)
# lamb_all = np.round(np.arange(0.2,1.0,0.05),4)
for grid in grid_all:
    from Network import network_design
    Network = network_design()
    arcs,arc_index_mapping = Network._getArcs(grid)
    L_N = grid[0]

    DataPath_parent = project_directory + middle_path +str(L_N)+'by'+str(L_N)+'_grid/'
    DataPath = DataPath_parent + f"data_size={num_train}_deg={deg}_e={e}_num_test={num_test}"+"_x_dist="+x_dist+"_e_dist="+e_dist+"/"
    print("DataPath:", DataPath)
    if os.path.exists(DataPath+"cost_DDR_Ante_all.pkl"):
        print("Results already exist")
    else:
        ILA.Run_DDR(DataPath,mu_all,lamb_all,arcs, grid,mis,bump,iteration_all,num_feat,data_generation_process)

DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data_JOC_R1/Shortest_Path_0620/Baseline_SPO_Data_Generation/2by2_grid/data_size=300_deg=1.0_e=0.5_num_test=1000_x_dist=uniform_e_dist=normal/
DDR: iter= 20 ,mu= 0.95 ,lamb= 0.95 ,cost_DDR_Ante = 7.628381328692132
DDR: iter= 40 ,mu= 0.95 ,lamb= 0.95 ,cost_DDR_Ante = 7.698895666416775
DDR: iter= 60 ,mu= 0.95 ,lamb= 0.95 ,cost_DDR_Ante = 7.726517202642985
DDR: iter= 80 ,mu= 0.95 ,lamb= 0.95 ,cost_DDR_Ante = 7.602819504381649
DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data_JOC_R1/Shortest_Path_0620/Baseline_SPO_Data_Generation/3by3_grid/data_size=300_deg=1.0_e=0.5_num_test=1000_x_dist=uniform_e_dist=normal/
DDR: iter= 20 ,mu= 0.95 ,lamb= 0.95 ,cost_DDR_Ante = 15.20394301144694
DDR: iter= 40 ,mu= 0.95 ,lamb= 0.95 ,cost_DDR_Ante = 14.630820829785968
DDR: iter= 60 ,mu= 0.95 ,lamb= 0.95 ,cost_DDR_Ante = 15.039463593377281
DDR: iter= 80 ,mu= 0.95 ,lamb= 0.95 ,cost_DDR_Ante = 14.8989759749228

KeyboardInterrupt: 

### Run SPO+

In [None]:
method_names = ["spo+"]
for grid in grid_all:
    from Network import network_design
    Network = network_design()
    arcs,arc_index_mapping = Network._getArcs(grid)
    L_N = grid[0]
    DataPath_parent = project_directory + middle_path +str(L_N)+'by'+str(L_N)+'_grid/'
    DataPath = DataPath_parent + f"data_size={num_train}_deg={deg}_e={e}_num_test={num_test}"+"_x_dist="+x_dist+"_e_dist="+e_dist+"/"
    print("DataPath:", DataPath)
    if os.path.exists(DataPath+"cost_spo+_Ante_all.pkl"):
        print("Results already exist")
    else:
        ILA.run_EPO_approaches(DataPath,method_names,arcs, grid,mis,bump,iteration_all,num_feat,data_generation_process)

DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data_JOC_R1/Shortest_Path_Rep/Baseline_SPO_Data_Generation/3by3_grid/data_size=500_deg=1.0_e=0.5_num_test=1000_x_dist=uniform_e_dist=normal/
['spo+'] : iter= 20 ,cost_EPO_Ante= 15.138965999965748
['spo+'] : iter= 40 ,cost_EPO_Ante= 14.642592337153301
['spo+'] : iter= 60 ,cost_EPO_Ante= 15.058470043170113
['spo+'] : iter= 80 ,cost_EPO_Ante= 14.910529416601412


Process ForkPoolWorker-1:
Traceback (most recent call last):
  File "/Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Code_MacBook/Decision_Driven_Regularization/.venv/lib/python3.9/site-packages/multiprocess/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Code_MacBook/Decision_Driven_Regularization/.venv/lib/python3.9/site-packages/multiprocess/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Code_MacBook/Decision_Driven_Regularization/.venv/lib/python3.9/site-packages/multiprocess/pool.py", line 125, in worker
    result = (True, func(*args, **kwds))
  File "/Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Code_MacBook/Decision_Driven_Regularization/.venv/lib/python3.9/site-packages/multiprocess/pool.py", line 48, in mapstar
    return list(map(*args))
  File "/Users/zhangxun/Dropbo

### Run PG 

In [None]:
method_names = ["pg"]
for grid in grid_all:
    from Network import network_design
    Network = network_design()
    arcs,arc_index_mapping = Network._getArcs(grid)
    L_N = grid[0]

    DataPath_parent = project_directory + middle_path +str(L_N)+'by'+str(L_N)+'_grid/'
    DataPath = DataPath_parent + f"data_size={num_train}_deg={deg}_e={e}_num_test={num_test}"+"_x_dist="+x_dist+"_e_dist="+e_dist+"/"
    print("DataPath:", DataPath)
    if os.path.exists(DataPath+"cost_pg_Ante_all.pkl"):
        print("Results already exist")
    else:
        ILA.run_EPO_approaches(DataPath,method_names,arcs, grid,mis,bump,iteration_all,num_feat,data_generation_process)

DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data_JOC_R1/Shortest_Path_Rep/Baseline_SPO_Data_Generation/3by3_grid/data_size=500_deg=1.0_e=0.5_num_test=1000_x_dist=uniform_e_dist=normal/
['pg'] : iter= 20 ,cost_EPO_Ante= 15.703973875114283

Interrupt request received


KeyboardInterrupt: 

### Run LTR

In [None]:
method_names = ["ltr"]
for grid in grid_all:
    from Network import network_design
    Network = network_design()
    arcs,arc_index_mapping = Network._getArcs(grid)
    L_N = grid[0]

    DataPath_parent = project_directory + middle_path +str(L_N)+'by'+str(L_N)+'_grid/'
    DataPath = DataPath_parent + f"data_size={num_train}_deg={deg}_e={e}_num_test={num_test}"+"_x_dist="+x_dist+"_e_dist="+e_dist+"/"
    print("DataPath:", DataPath)
    if os.path.exists(DataPath+"cost_ltr_Ante_all.pkl"):
        print("Results already exist")
    else:
        ILA.run_EPO_approaches(DataPath,method_names,arcs, grid,mis,bump,iteration_all,num_feat,data_generation_process)

### Run Random Forest

In [None]:
for grid in grid_all:
    from Network import network_design
    Network = network_design()
    arcs,arc_index_mapping = Network._getArcs(grid)
    L_N = grid[0]

    DataPath_parent = project_directory + middle_path +str(L_N)+'by'+str(L_N)+'_grid/'
    DataPath = DataPath_parent + f"data_size={num_train}_deg={deg}_e={e}_num_test={num_test}"+"_x_dist="+x_dist+"_e_dist="+e_dist+"/"
    print("DataPath:", DataPath)
    if os.path.exists(DataPath+"cost_RF_Ante_all.pkl"):
        print("Results already exist")
    else:
        SLA.Run_Random_Forest(DataPath,arcs, grid,mis,bump,iteration_all,num_feat,data_generation_process)

DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data_JOC_R1/Shortest_Path_Rep/Baseline_SPO_Data_Generation/3by3_grid/data_size=500_deg=1.0_e=0.5_num_test=1000_x_dist=uniform_e_dist=normal/
Results already exist


### Run XGBoost

In [None]:
for grid in grid_all:
    from Network import network_design
    Network = network_design()
    arcs,arc_index_mapping = Network._getArcs(grid)
    L_N = grid[0]

    DataPath_parent = project_directory + middle_path +str(L_N)+'by'+str(L_N)+'_grid/'
    DataPath = DataPath_parent + f"data_size={num_train}_deg={deg}_e={e}_num_test={num_test}"+"_x_dist="+x_dist+"_e_dist="+e_dist+"/"
    print("DataPath:", DataPath)
    if os.path.exists(DataPath+"cost_XG_Ante_all.pkl"):
        print("Results already exist")
    else:
        SLA.Run_XGBoost(DataPath,arcs, grid,mis,bump,iteration_all,num_feat,data_generation_process)

DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data_JOC_R1/Shortest_Path_Rep/Baseline_SPO_Data_Generation/3by3_grid/data_size=500_deg=1.0_e=0.5_num_test=1000_x_dist=uniform_e_dist=normal/
XGboost: iter= 20 ,cost_XG_Ante= 15.461066824674333
XGboost: iter= 40 ,cost_XG_Ante= 14.979540384667724
XGboost: iter= 60 ,cost_XG_Ante= 15.453200963646042
XGboost: iter= 80 ,cost_XG_Ante= 15.310284663914299


### Comparison between DDR with OLS,SPO+ and other approaches using H2H and Regret reduction distribution
Refer to: Shortest_Path_Reproduce/Baseline_DDR_vs_Others_H2H_Regret.ipynb

#### Impact of network size

#### Calibrate $\mu$ and $\lambda$: Refer to Calibrate_mu_lambda_SPO.ipynb