In [1]:
import random
import numpy as np
import torch
import os
import pathlib
import pickle

In [2]:
# data_generation_process = "SPO_Data_Generation"
data_generation_process = "DDR_Generation"

In [3]:
current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)
grandparent_directory = os.path.dirname(parent_directory)
DataPath = os.path.dirname(grandparent_directory) + '/Data/' + data_generation_process + "/"
pathlib.Path(DataPath).mkdir(parents=True, exist_ok=True)
print("grandparent_directory:", grandparent_directory)
print("DataPath:", DataPath)

grandparent_directory: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Code_MacBook
DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data/DDR_Generation/


# Parameters

In [4]:
# import pyepo
# generate data
grid = (5,5) # grid size
num_data = 100 # number of training data
num_feat = 5 # size of feature
num_test = 1000
deg = 1.0 # polynomial degree
e = 1.0 # noise width

In [5]:
DataPath = DataPath + "data_size="+str(num_data)+"_deg="+str(deg)+"_e="+str(e)+"/"
pathlib.Path(DataPath).mkdir(parents=True, exist_ok=True)

In [6]:
def obtain_data(data_generation_process,num_data,num_test, num_feat, grid, deg, e, seed):
    from Data import data_generation
    data_gen = data_generation()
    if data_generation_process == "SPO_Data_Generation":
        feats, costs = data_gen.generate_Shortest_Path_Data(num_data+num_test, num_feat, grid, deg, e, seed=seed)
        # split train test data
        from sklearn.model_selection import train_test_split
        x_train, x_test, c_train, c_test = train_test_split(feats, costs, test_size=num_test, random_state=42)

    if data_generation_process == "DDR_Generation":
        lower = 0
        upper = 1
        p = 5
        d = 40
        alpha = 1.0
        mis = deg
        n_epsilon = 1
        W_star = data_gen.generate_truth("",lower, upper, p, d, seed,version = 0) 
        # print("W_star = ",W_star[0,:])
        x_test, z_test_ori, c_test, x_train, z_train_ori, c_train, W_star = data_gen.generate_samples("",p, d, num_test, num_data, alpha, W_star, n_epsilon, mis, thres = 10, 
                                version = 1, x_dist = 'normal', e_dist = 'normal', x_low = 0, x_up = 2, x_mean = 2, x_var = 0.25, bump = 0) 

    return x_train, x_test, c_train, c_test

In [7]:
seed_all = np.arange(1,2)
cost_Oracle_all = {}; cost_SPO_all = {}; cost_OLS_all = {}; cost_DDR_all = {}

for seed in seed_all:
    DataPath_seed = DataPath +"Seed="+str(seed)+"/"
    pathlib.Path(DataPath_seed).mkdir(parents=True, exist_ok=True)

    # #  ****** Data generation *********
    x_train, x_test, c_train, c_test = obtain_data(data_generation_process,num_data,num_test, num_feat, grid, deg, e, seed)

    raw_data = {}
    raw_data["x_train"] = x_train; raw_data["x_test"] = x_test; raw_data["c_train"] = c_train; raw_data["c_test"] = c_test
    with open(DataPath_seed +'raw_data.pkl', "wb") as tf:
        pickle.dump(raw_data,tf)

    #  ****** SPO *********
    print("*** seed = ",seed,": Run SPO ========")
    from SPO_Plus import run_SPO_Shortest_Path
    SPO_runner = run_SPO_Shortest_Path()
    batch_size = 1
    num_epochs = 1
    arcs,loader_train,loader_test,cost_Oracle_all[seed],cost_SPO_all[seed] = SPO_runner.run(DataPath_seed,x_train,c_train,x_test,c_test,batch_size,num_feat,grid,num_epochs,True)
    print("Average Oracle Cost = ",np.mean(cost_Oracle_all[seed]))
    print("Average SPO Cost = ",np.mean(cost_SPO_all[seed]))
    print()

    #  ****** OLS *********
    print("*** seed = ",seed,": Run OLS ========")
    from OLS import run_OLS_Shortest_Path
    OLS_runner = run_OLS_Shortest_Path()
    cost_OLS_all[seed] = OLS_runner.run(DataPath_seed,arcs,x_train,c_train,grid,loader_test,loader_train)
    print("Average OLS Cost = ",np.mean(cost_OLS_all[seed]))
    print()

    #  ****** DDR *********
    print("*** seed = ",seed,": Run DDR ========")
    from DDR import run_DDR_Shortest_Path
    DDR_runner = run_DDR_Shortest_Path()
    mu_arr = np.arange(-1.0,1.0,0.1)
    lamb_arr = np.arange(-1,1,0.1)
    # mu_arr = [0.5]
    lamb_arr = [0.1,1.0,10]
    minimum_value = 1000000000

    cost_DDR_all[seed] = DDR_runner.run(DataPath_seed,lamb_arr,mu_arr,arcs,x_train, c_train, grid,loader_test,num_nodes=25)
    print()

Set parameter Username

--------------------------------------------
--------------------------------------------

Academic license - for non-commercial use only - expires 2025-03-25
Test
Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 912.78it/s]


Test
Optimizing for optDataset...


100%|██████████| 1000/1000 [00:00<00:00, 2112.45it/s]


Num of cores: 2
Average Oracle Cost =  29.145387698173522
Average SPO Cost =  30.582061832024717

Average OLS Cost =  30.51923964820255

lambda =  0.1 , mu =  -1.0 , Average DDR cost =  30.523408426612733

lambda =  0.1 , mu =  -0.9 , Average DDR cost =  30.52216194412124

lambda =  0.1 , mu =  -0.8 , Average DDR cost =  30.52036291763198

lambda =  0.1 , mu =  -0.7000000000000001 , Average DDR cost =  30.52131504902255

lambda =  0.1 , mu =  -0.6000000000000001 , Average DDR cost =  30.522275850027917

lambda =  0.1 , mu =  -0.5000000000000001 , Average DDR cost =  30.520380322188256

lambda =  0.1 , mu =  -0.40000000000000013 , Average DDR cost =  30.518313771933432

lambda =  0.1 , mu =  -0.30000000000000016 , Average DDR cost =  30.516211866468307

lambda =  0.1 , mu =  -0.20000000000000018 , Average DDR cost =  30.523287116676684

lambda =  0.1 , mu =  -0.1000000000000002 , Average DDR cost =  30.529007207105053

lambda =  0.1 , mu =  -2.220446049250313e-16 , Average DDR cost =  3

# Results

In [8]:
for seed in seed_all:
    print("Seed = ",seed,", Average Oracle Cost = ",np.round(np.mean(cost_Oracle_all[seed]),4),"Std = ", np.round(np.std(cost_Oracle_all[seed]),4))
    print("Seed = ",seed,", Average SPO Cost = ", np.round(np.mean(cost_SPO_all[seed]),4),"Std = ", np.round(np.std(cost_SPO_all[seed]),4))
    print("Seed = ",seed,", Average OLS Cost = ", np.round(np.mean(cost_OLS_all[seed]),4),"Std = ", np.round(np.std(cost_OLS_all[seed]),4))
    cost_ddr_lowest = 10000; lamb_opt = 0; mu_opt = 0
    for lamb in lamb_arr:
        # print("======== lambda = ",lamb,"============")
        for mu in mu_arr:
            # print("Seed = ",seed,", lamb = ",lamb,",mu = ",mu,", Average DRR Cost = ", np.round(np.mean(cost_DDR_all[seed][lamb,mu]["cost"]),4),"Std = ", np.round(np.std(cost_DDR_all[seed][lamb,mu]["cost"]),4))
            if np.mean(cost_DDR_all[seed][lamb,mu]["cost"]) < cost_ddr_lowest:
                cost_ddr_lowest = np.mean(cost_DDR_all[seed][lamb,mu]["cost"])
                lamb_opt = lamb
                mu_opt = mu
    print("Seed = ",seed,", lamb_opt = ",lamb_opt,",mu_opt = ",mu_opt,", Average DRR Cost = ", np.round(cost_ddr_lowest,4))
    print("Seed = ",seed,", opt ratio = ",(np.mean(cost_OLS_all[seed]) - cost_ddr_lowest)/np.mean(cost_OLS_all[seed]))
    print()

Seed =  1 , Average Oracle Cost =  29.1454 Std =  4.42
Seed =  1 , Average SPO Cost =  30.5821 Std =  4.6044
Seed =  1 , Average OLS Cost =  30.5192 Std =  4.6149
Seed =  1 , lamb_opt =  0.1 ,mu_opt =  0.6999999999999996 , Average DRR Cost =  30.5143
Seed =  1 , opt ratio =  0.00016035850661909995

