In [1]:
import random
import numpy as np
import torch
import os
import pathlib
import pickle

In [2]:
data_generation_process = "SPO_Data_Generation"
# data_generation_process = "DDR_Generation"

In [3]:
current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)
grandparent_directory = os.path.dirname(parent_directory)
DataPath = os.path.dirname(grandparent_directory) + '/Data/' + data_generation_process + "/"
pathlib.Path(DataPath).mkdir(parents=True, exist_ok=True)
print("grandparent_directory:", grandparent_directory)
print("DataPath:", DataPath)

grandparent_directory: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Code_MacBook
DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data/SPO_Data_Generation/


# Parameters

In [4]:
# import pyepo
# generate data
grid = (5,5) # grid size
num_data = 100 # number of training data
num_feat = 5 # size of feature
num_test = 1000
deg = 1.5 # polynomial degree
e = 1.0 # noise width

In [5]:
DataPath = DataPath + "data_size="+str(num_data)+"_deg="+str(deg)+"_e="+str(e)+"/"
pathlib.Path(DataPath).mkdir(parents=True, exist_ok=True)

In [6]:
def obtain_data(data_generation_process,num_data,num_test, num_feat, grid, deg, e, seed):
    from Data import data_generation
    data_gen = data_generation()
    if data_generation_process == "SPO_Data_Generation":
        feats, costs = data_gen.generate_Shortest_Path_Data(num_data+num_test, num_feat, grid, deg, e, seed=seed)
        # split train test data
        from sklearn.model_selection import train_test_split
        x_train, x_test, c_train, c_test = train_test_split(feats, costs, test_size=num_test, random_state=42)

    if data_generation_process == "DDR_Generation":
        lower = 0
        upper = 1
        p = 5
        d = 40
        alpha = 1.0
        mis = deg
        n_epsilon = 1
        W_star = data_gen.generate_truth("",lower, upper, p, d, seed,version = 0) 
        # print("W_star = ",W_star[0,:])
        x_test, z_test_ori, c_test, x_train, z_train_ori, c_train, W_star = data_gen.generate_samples("",p, d, num_test, num_data, alpha, W_star, n_epsilon, mis, thres = 10, 
                                version = 1, x_dist = 'normal', e_dist = 'normal', x_low = 0, x_up = 2, x_mean = 2, x_var = 0.25, bump = 0) 

    return x_train, x_test, c_train, c_test

In [None]:
seed_all = np.arange(1,10)
cost_Oracle_all = {}; cost_SPO_all = {}; cost_OLS_all = {}; cost_DDR_all = {}

for seed in seed_all:
    DataPath_seed = DataPath +"Seed="+str(seed)+"/"
    pathlib.Path(DataPath_seed).mkdir(parents=True, exist_ok=True)

    # #  ****** Data generation *********
    x_train, x_test, c_train, c_test = obtain_data(data_generation_process,num_data,num_test, num_feat, grid, deg, e, seed)

    raw_data = {}
    raw_data["x_train"] = x_train; raw_data["x_test"] = x_test; raw_data["c_train"] = c_train; raw_data["c_test"] = c_test
    with open(DataPath_seed +'raw_data.pkl', "wb") as tf:
        pickle.dump(raw_data,tf)

    #  ****** SPO *********
    print("*** seed = ",seed,": Run SPO ========")
    from SPO_Plus import run_SPO_Shortest_Path
    SPO_runner = run_SPO_Shortest_Path()
    batch_size = 1
    num_epochs = 1
    arcs,loader_train,loader_test,cost_Oracle_all[seed],cost_SPO_all[seed] = SPO_runner.run(DataPath_seed,x_train,c_train,x_test,c_test,batch_size,num_feat,grid,num_epochs,True)
    print("Average Oracle Cost = ",np.mean(cost_Oracle_all[seed]))
    print("Average SPO Cost = ",np.mean(cost_SPO_all[seed]))
    print()

    #  ****** OLS *********
    print("*** seed = ",seed,": Run OLS ========")
    from OLS import run_OLS_Shortest_Path
    OLS_runner = run_OLS_Shortest_Path()
    cost_OLS_all[seed] = OLS_runner.run(DataPath_seed,arcs,x_train,c_train,grid,loader_test,loader_train)
    print("Average OLS Cost = ",np.mean(cost_OLS_all[seed]))
    print()

    #  ****** DDR *********
    print("*** seed = ",seed,": Run DDR ========")
    from DDR import run_DDR_Shortest_Path
    DDR_runner = run_DDR_Shortest_Path()
    mu_arr = np.arange(0.0,0.51,0.05)
    lamb_arr = np.arange(1,4.1,0.5)
    # mu_arr = [0.5]
    # lamb_arr = [0.1,1.0,10]
    cost_DDR_all[seed] = DDR_runner.run(DataPath_seed,lamb_arr,mu_arr,arcs,x_train, c_train, grid,loader_test,num_nodes=25)
    print()

Set parameter Username

--------------------------------------------
--------------------------------------------

Academic license - for non-commercial use only - expires 2025-03-25
Test
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 1708.77it/s]


Test
Optimizing for optDataset...


100%|██████████| 1000/1000 [00:00<00:00, 2185.15it/s]


Num of cores: 2
Average Oracle Cost =  4.658342152833939
Average SPO Cost =  7.080734337605215

Average OLS Cost =  6.927368302423703

lambda =  1.0 , mu =  0.0 , Average DDR cost =  6.948242479914181

lambda =  1.0 , mu =  0.05 , Average DDR cost =  6.95392526934691

lambda =  1.0 , mu =  0.1 , Average DDR cost =  6.956431036455099

lambda =  1.0 , mu =  0.15000000000000002 , Average DDR cost =  6.9107167106696945

lambda =  1.0 , mu =  0.2 , Average DDR cost =  6.906816532835837

lambda =  1.0 , mu =  0.25 , Average DDR cost =  6.900678914333235

lambda =  1.0 , mu =  0.30000000000000004 , Average DDR cost =  6.877292861475617

lambda =  1.0 , mu =  0.35000000000000003 , Average DDR cost =  6.894597584966345

lambda =  1.0 , mu =  0.4 , Average DDR cost =  6.879524982813753

lambda =  1.0 , mu =  0.45 , Average DDR cost =  6.899162887211896

lambda =  1.0 , mu =  0.5 , Average DDR cost =  6.891072685538217

lambda =  1.5 , mu =  0.0 , Average DDR cost =  7.097537736349448

lambda =  

100%|██████████| 100/100 [00:00<00:00, 1516.53it/s]


Test
Optimizing for optDataset...


100%|██████████| 1000/1000 [00:00<00:00, 2062.34it/s]


Num of cores: 2
Average Oracle Cost =  4.706071251630783
Average SPO Cost =  7.35546965525583

Average OLS Cost =  7.127673941595545

lambda =  1.0 , mu =  0.0 , Average DDR cost =  7.067074989661815

lambda =  1.0 , mu =  0.05 , Average DDR cost =  7.058518123765949

lambda =  1.0 , mu =  0.1 , Average DDR cost =  7.070522162437446

lambda =  1.0 , mu =  0.15000000000000002 , Average DDR cost =  7.100011762816699

lambda =  1.0 , mu =  0.2 , Average DDR cost =  7.0662716347464665

lambda =  1.0 , mu =  0.25 , Average DDR cost =  7.088378431779805

lambda =  1.0 , mu =  0.30000000000000004 , Average DDR cost =  7.081344361120842

lambda =  1.0 , mu =  0.35000000000000003 , Average DDR cost =  7.101052571196465

lambda =  1.0 , mu =  0.4 , Average DDR cost =  7.089774076617214

lambda =  1.0 , mu =  0.45 , Average DDR cost =  7.104011635984185

lambda =  1.0 , mu =  0.5 , Average DDR cost =  7.086529043272377

lambda =  1.5 , mu =  0.0 , Average DDR cost =  7.187217042854521

lambda =  

100%|██████████| 100/100 [00:00<00:00, 1427.55it/s]


Test
Optimizing for optDataset...


100%|██████████| 1000/1000 [00:00<00:00, 1914.35it/s]


Num of cores: 2
Average Oracle Cost =  4.666621542096138
Average SPO Cost =  7.092626354527267

Average OLS Cost =  6.862967009014115

lambda =  1.0 , mu =  0.0 , Average DDR cost =  7.038547129354891

lambda =  1.0 , mu =  0.05 , Average DDR cost =  7.037213048011574

lambda =  1.0 , mu =  0.1 , Average DDR cost =  7.0502014545699785

lambda =  1.0 , mu =  0.15000000000000002 , Average DDR cost =  7.079964624480898

lambda =  1.0 , mu =  0.2 , Average DDR cost =  7.032639886159901

lambda =  1.0 , mu =  0.25 , Average DDR cost =  7.025937627051826

lambda =  1.0 , mu =  0.30000000000000004 , Average DDR cost =  6.970496689135427

lambda =  1.0 , mu =  0.35000000000000003 , Average DDR cost =  6.938943328842942

lambda =  1.0 , mu =  0.4 , Average DDR cost =  6.952062741260699

lambda =  1.0 , mu =  0.45 , Average DDR cost =  6.957734427072762

lambda =  1.0 , mu =  0.5 , Average DDR cost =  6.949432899580002

lambda =  1.5 , mu =  0.0 , Average DDR cost =  7.027236059626594

lambda = 

100%|██████████| 100/100 [00:00<00:00, 1401.45it/s]


Test
Optimizing for optDataset...


100%|██████████| 1000/1000 [00:00<00:00, 2093.74it/s]


Num of cores: 2
Average Oracle Cost =  4.764735116481781
Average SPO Cost =  7.4849032407247575

Average OLS Cost =  7.097674512511439

lambda =  1.0 , mu =  0.0 , Average DDR cost =  7.303526296744108

lambda =  1.0 , mu =  0.05 , Average DDR cost =  7.289071025361176

lambda =  1.0 , mu =  0.1 , Average DDR cost =  7.260269376232697

lambda =  1.0 , mu =  0.15000000000000002 , Average DDR cost =  7.249284467085555

lambda =  1.0 , mu =  0.2 , Average DDR cost =  7.196833095191614

lambda =  1.0 , mu =  0.25 , Average DDR cost =  7.192028785358517

lambda =  1.0 , mu =  0.30000000000000004 , Average DDR cost =  7.1891892250543314

lambda =  1.0 , mu =  0.35000000000000003 , Average DDR cost =  7.200947316182181

lambda =  1.0 , mu =  0.4 , Average DDR cost =  7.160120027051293

lambda =  1.0 , mu =  0.45 , Average DDR cost =  7.1610027939568095

lambda =  1.0 , mu =  0.5 , Average DDR cost =  7.129911018173589

lambda =  1.5 , mu =  0.0 , Average DDR cost =  7.329601553775763

lambda 

100%|██████████| 100/100 [00:00<00:00, 1385.20it/s]


Test
Optimizing for optDataset...


100%|██████████| 1000/1000 [00:00<00:00, 2109.46it/s]


Num of cores: 2
Average Oracle Cost =  4.7275598778724675
Average SPO Cost =  7.146225646285748

Average OLS Cost =  6.97560201691481

lambda =  1.0 , mu =  0.0 , Average DDR cost =  7.158044443351275

lambda =  1.0 , mu =  0.05 , Average DDR cost =  7.13196452060825

lambda =  1.0 , mu =  0.1 , Average DDR cost =  7.143901545050729

lambda =  1.0 , mu =  0.15000000000000002 , Average DDR cost =  7.108893817371864

lambda =  1.0 , mu =  0.2 , Average DDR cost =  7.093131474409892

lambda =  1.0 , mu =  0.25 , Average DDR cost =  7.068724381672859

lambda =  1.0 , mu =  0.30000000000000004 , Average DDR cost =  7.073839374388277

lambda =  1.0 , mu =  0.35000000000000003 , Average DDR cost =  7.041937547985174

lambda =  1.0 , mu =  0.4 , Average DDR cost =  7.023874642640934

lambda =  1.0 , mu =  0.45 , Average DDR cost =  7.004882731745136

lambda =  1.0 , mu =  0.5 , Average DDR cost =  7.001248354409996

lambda =  1.5 , mu =  0.0 , Average DDR cost =  7.207403082178906

lambda =  1

100%|██████████| 100/100 [00:00<00:00, 1383.62it/s]


Test
Optimizing for optDataset...


100%|██████████| 1000/1000 [00:00<00:00, 1960.67it/s]


Num of cores: 2
Average Oracle Cost =  4.6718733323812485
Average SPO Cost =  7.154671416052908

Average OLS Cost =  6.958839252466183

lambda =  1.0 , mu =  0.0 , Average DDR cost =  7.0252581600034265

lambda =  1.0 , mu =  0.05 , Average DDR cost =  7.018601312009276

lambda =  1.0 , mu =  0.1 , Average DDR cost =  7.012046851384936

lambda =  1.0 , mu =  0.15000000000000002 , Average DDR cost =  6.968276532062962



# Results

In [None]:
for seed in seed_all:
    cost_ddr_opt = 10000
    lamb_opt = 0
    mu_opt = 0
    ddr_ols_ratio = 0
    for lamb in lamb_arr:
        for mu in mu_arr:
            # print("Seed = ",seed,", lamb_opt = ",lamb,",mu_opt = ",mu,", Average DRR Cost = ", np.round(np.mean(cost_DDR_all[seed][lamb,mu]["cost"]),4))
            if np.mean(cost_DDR_all[seed][lamb,mu]["cost"]) < cost_ddr_opt:
                cost_ddr_opt = np.mean(cost_DDR_all[seed][lamb,mu]["cost"])
                lamb_opt = lamb
                mu_opt = mu
                ddr_ols_ratio = ((np.mean(cost_OLS_all[seed])) - cost_ddr_opt)/(np.mean(cost_OLS_all[seed]) - np.mean(cost_Oracle_all[seed]))
    print("Seed = ",seed,", lamb_opt = ",lamb_opt,",mu_opt = ",mu_opt,", Average DRR Cost = ", np.round(cost_ddr_opt,4), "DDR_OLS_ratio = ",ddr_ols_ratio)
