In [35]:
# set random seed
import random
# random.seed(42)
import numpy as np
# np.random.seed(42)
import torch
# torch.manual_seed(42)
# torch.cuda.manual_seed(42)
import os
import pathlib
import pickle

In [36]:
current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)
grandparent_directory = os.path.dirname(parent_directory)
DataPath = os.path.dirname(grandparent_directory) + '/Data/SPO_Data_Generation/'
pathlib.Path(DataPath).mkdir(parents=True, exist_ok=True)
print("grandparent_directory:", grandparent_directory)
print("DataPath:", DataPath)

grandparent_directory: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Code_MacBook
DataPath: /Users/zhangxun/Dropbox/Research/Decision_Driven_Regularization/Data/SPO_Data_Generation/


# Parameters

In [37]:
# import pyepo
# generate data
grid = (5,5) # grid size
num_data = 50 # number of training data
num_feat = 5 # size of feature
num_test = 1000
deg = 0.8 # polynomial degree
e = 0.5 # noise width

In [38]:
DataPath = DataPath + "data_size="+str(num_data)+"_deg="+str(deg)+"_e="+str(e)+"/"
pathlib.Path(DataPath).mkdir(parents=True, exist_ok=True)

In [None]:
seed_all = np.arange(1,10)
cost_Oracle_all = {}; cost_SPO_all = {}; cost_OLS_all = {}; cost_DDR_all = {}

for seed in seed_all:
    DataPath_seed = DataPath +"Seed="+str(seed)+"/"
    pathlib.Path(DataPath_seed).mkdir(parents=True, exist_ok=True)
    from Data import data_generation
    data_gen = data_generation()

    #  ****** Data generation *********
    feats, costs = data_gen.generate_Shortest_Path_Data(num_data+num_test, num_feat, grid, deg, e, seed=seed)
    # split train test data
    from sklearn.model_selection import train_test_split
    x_train, x_test, c_train, c_test = train_test_split(feats, costs, test_size=num_test, random_state=42)
    raw_data = {}
    raw_data["x_train"] = x_train; raw_data["x_test"] = x_test; raw_data["c_train"] = c_train; raw_data["c_test"] = c_test
    with open(DataPath_seed +'raw_data.pkl', "wb") as tf:
        pickle.dump(raw_data,tf)

    #  ****** SPO *********
    print("*** seed = ",seed,": Run SPO ========")
    from SPO_Plus import run_SPO_Shortest_Path
    SPO_runner = run_SPO_Shortest_Path()
    batch_size = 20
    num_epochs = 30
    arcs,loader_train,loader_test,cost_Oracle_all[seed],cost_SPO_all[seed] = SPO_runner.run(DataPath_seed,x_train,c_train,x_test,c_test,batch_size,num_feat,grid,num_epochs,True)

    #  ****** OLS *********
    print("*** seed = ",seed,": Run OLS ========")
    from OLS import run_OLS_Shortest_Path
    OLS_runner = run_OLS_Shortest_Path()
    cost_OLS_all[seed] = OLS_runner.run(DataPath_seed,arcs,x_train,c_train,grid,loader_test)
    # print("Average OLS Cost = ",np.mean(cost_OLS))


    #  ****** DDR *********
    print("*** seed = ",seed,": Run DDR ========")
    from DDR import run_DDR_Shortest_Path
    DDR_runner = run_DDR_Shortest_Path()
    mu_arr = np.arange(0.1,1,0.05)
    lamb_arr = np.arange(0.1,1,0.05)
    lamb_arr = [0.05,0.1,0.15,0.2]
    minimum_value = 1000000000

    cost_DDR_all[seed] = DDR_runner.run(DataPath_seed,lamb_arr,mu_arr,arcs,x_train, c_train, grid,loader_test,num_nodes=25)


Test
Optimizing for optDataset...


100%|██████████| 50/50 [00:00<00:00, 1153.30it/s]


Test
Optimizing for optDataset...


100%|██████████| 1000/1000 [00:00<00:00, 2061.33it/s]


Num of cores: 2


# Results

In [None]:
seed = 5
lamb = 0.1
for seed in seed_all:
    print("Seed = ",seed,", Average Oracle Cost = ",np.round(np.mean(cost_Oracle_all[seed]),4),"Std = ", np.round(np.std(cost_Oracle_all[seed]),4))
    print("Seed = ",seed,", Average SPO Cost = ", np.round(np.mean(cost_SPO_all[seed]),4),"Std = ", np.round(np.std(cost_SPO_all[seed]),4))
    print("Seed = ",seed,", Average OLS Cost = ", np.round(np.mean(cost_OLS_all[seed]),4),"Std = ", np.round(np.std(cost_OLS_all[seed]),4))
    cost_ddr_lowest = 10000; lamb_opt = 0; mu_opt = 0
    for lamb in lamb_arr:
        # print("======== lambda = ",lamb,"============")
        for mu in mu_arr:
            # print("Seed = ",seed,", lamb = ",lamb,",mu = ",mu,", Average DRR Cost = ", np.round(np.mean(cost_DDR_all[seed][lamb,mu]["cost"]),4),"Std = ", np.round(np.std(cost_DDR_all[seed][lamb,mu]["cost"]),4))
            if np.mean(cost_DDR_all[seed][lamb,mu]["cost"]) < cost_ddr_lowest:
                cost_ddr_lowest = np.mean(cost_DDR_all[seed][lamb,mu]["cost"])
                lamb_opt = lamb
                mu_opt = mu
    print("Seed = ",seed,", lamb_opt = ",lamb_opt,",mu_opt = ",mu_opt,", Average DRR Cost = ", np.round(cost_ddr_lowest,4))
    print("Seed = ",seed,", opt ratio = ",(np.mean(cost_OLS_all[seed]) - cost_ddr_lowest)/np.mean(cost_OLS_all[seed]))
    print()