In [12]:
import os
import sys
from tqdm import tqdm
import re
from itertools import combinations
import pandas as pd
import numpy as np

Read the results of the optimization. As we did run the optimization with multiple seeds, we need to merge these results by adhering to epsilon dominance among them. First, let's read all different csvs:

In [13]:
# Get the list of optimization results files:

d_vars = dict()
objectives = dict()

for file in os.listdir("../outputs/"):
    if "baseline_opt_dvs" in file:
        seed_search = re.search("seed_(.+?).csv", file)
        seed = int(seed_search.group(1))
        df = pd.read_csv(f"../outputs/{file}").rename(
            columns={"Unnamed: 0": "old_index"}
        )
        df["seed_index"] = seed
        df = df.set_index(["seed_index", "old_index"])
        d_vars[seed] = df

    elif "baseline_opt_objs" in file:
        seed_search = re.search("seed_(.+?).csv", file)
        seed = int(seed_search.group(1))
        df = pd.read_csv(f"../outputs/{file}").rename(
            columns={"Unnamed: 0": "old_index"}
        )
        df["seed_index"] = seed
        df = df.set_index(["seed_index", "old_index"])
        objectives[seed] = df

all_seeds = list(objectives.keys())

# .drop(columns=["Unnamed: 0"])

Now, the roadmap for checking for epsilon dominance is:
1) Merge all dataframes to get a single long one. Use hieararchical indexing [(seed, old_index)]
2) Divide every solution's objective values by epsilons and get the floor. Record these values side-by-side to the same dataframe as new columns
3) Generate an empty list of dominated indices
4) Go through all two-way combination of indices in our dataframe. Check them, if one is dominated, eliminate it (put in the dominated list)
5) If not, check if their floors are exactly the same (in the same epsilon box). If not, continue. If yes, keep the one that is closest to the ideal corner and eliminate the other one.



In [14]:
full_dvs = pd.concat(d_vars.values())
full_objectives = pd.concat(objectives.values())
full_objectives

Unnamed: 0_level_0,Unnamed: 1_level_0,Egypt_irr_def,HAD_min_level,Sudan_irr_def,Ethiopia_hydroenergy
seed_index,old_index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10,0,3.169425,178.101013,0.0,13.520777
10,1,4.394575,177.009079,0.0,14.200665
10,2,0.890999,174.851746,0.0,13.913525
10,3,1.268295,175.973740,0.0,13.901584
10,4,2.951919,178.100632,0.0,13.404197
10,...,...,...,...,...
10,281,0.899015,176.846756,0.0,13.377584
10,282,0.675063,176.555389,0.0,13.234435
10,283,2.761871,177.238708,0.0,14.000350
10,284,1.138742,176.993881,0.0,13.507268


In [15]:
full_dvs

Unnamed: 0_level_0,Unnamed: 1_level_0,v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,...,v195,v196,v197,v198,v199,v200,v201,v202,v203,v204
seed_index,old_index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
10,0,0.060325,0.877317,0.561054,0.119365,0.000000,1.000000,1.000000,0.701520,0.698278,-0.012731,...,0.438421,0.569075,0.062209,0.882537,0.264937,0.333443,0.528249,0.342959,0.984255,1.000000
10,1,0.055795,0.237636,0.441866,0.088090,0.065339,0.907105,0.248029,0.578255,0.755401,-0.254503,...,0.978921,0.618624,0.716298,0.551495,0.353163,0.970628,0.778533,0.246930,0.869173,0.567703
10,2,0.058701,0.707590,0.275926,0.124304,0.095253,0.793470,0.956274,0.696490,0.701188,0.102418,...,0.469593,0.802860,0.286315,0.609391,0.600699,0.236794,0.673422,0.407574,0.400413,0.132143
10,3,0.055258,0.189708,0.289355,0.114021,0.099022,0.868064,0.460202,0.325692,0.653089,0.684467,...,0.330390,0.698901,0.582098,0.371123,0.935251,0.776950,0.627426,0.588914,0.470923,0.459146
10,4,0.056138,0.760261,0.670834,0.103302,0.027076,0.899806,0.946472,0.664383,0.778911,0.114547,...,0.571411,0.532694,0.120599,0.593334,0.323594,0.802325,0.816070,0.324663,0.585280,0.802974
10,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10,281,0.057373,0.511340,0.291586,0.102654,0.090517,0.788240,0.931789,0.647895,0.670454,0.255518,...,0.324308,0.807524,0.275691,0.659449,0.669455,0.776529,0.664361,0.529134,0.625523,0.202523
10,282,0.057913,0.581047,0.301028,0.106575,0.094237,0.753820,0.994166,0.677162,0.671265,0.194900,...,0.338524,0.822356,0.240501,0.691977,0.646794,0.726920,0.670127,0.558425,0.624273,0.191900
10,283,0.055426,0.147217,0.276137,0.100616,0.080826,0.868489,0.299402,0.361504,0.680132,0.647343,...,0.429739,0.566586,0.772523,0.476751,0.896427,0.770106,0.632127,0.373011,0.617232,0.454654
10,284,0.057225,0.600214,0.366474,0.099447,0.086899,0.756079,0.972816,0.659160,0.673030,0.261658,...,0.248638,0.817769,0.238858,0.715231,0.644497,0.614045,0.696484,0.501364,0.555823,0.234096


In [16]:
epsilons = [0.1, 0.1, 0.1, 0.1]
directions = [1, -1, 1, -1]

for i, column in enumerate(full_objectives.columns):
    full_objectives[f"{column}_floor"] = full_objectives[column] // epsilons[i]

full_objectives.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Egypt_irr_def,HAD_min_level,Sudan_irr_def,Ethiopia_hydroenergy,Egypt_irr_def_floor,HAD_min_level_floor,Sudan_irr_def_floor,Ethiopia_hydroenergy_floor
seed_index,old_index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
10,0,3.169425,178.101013,0.0,13.520777,31.0,1781.0,0.0,135.0
10,1,4.394575,177.009079,0.0,14.200665,43.0,1770.0,0.0,142.0
10,2,0.890999,174.851746,0.0,13.913525,8.0,1748.0,0.0,139.0
10,3,1.268295,175.97374,0.0,13.901584,12.0,1759.0,0.0,139.0
10,4,2.951919,178.100632,0.0,13.404197,29.0,1781.0,0.0,134.0


In [17]:
def compare_two_solutions(index1, index2):
    # Returns the dominated index

    copy_df = full_objectives.copy()

    first_sub = copy_df.loc[index1][:4]
    second_sub = copy_df.loc[index2][:4]

    for i, direct in enumerate(directions):
        first_sub[i] *= direct
        second_sub[i] *= direct

    if all(first_sub < second_sub):
        return index2
    elif all(second_sub < first_sub):
        return index1
    else:
        return None


def epsilon_comparison(index1, index2, reference_point):
    copy_df = full_objectives.copy()

    first_sub = copy_df.loc[index1]
    second_sub = copy_df.loc[index2]

    if all(first_sub[4:] == second_sub[4:]):
        distance1 = np.linalg.norm(first_sub[:4] - reference_point)
        distance2 = np.linalg.norm(second_sub[:4] - reference_point)

        if distance1 < distance2:
            return index2
        elif distance2 < distance1:
            return index1

In [18]:
dominated_indices = []
reference_point = np.array([0, 185, 0, 20])

indices = list(full_objectives.index)
two_way_combs = combinations(indices, 2)
for comb in tqdm(two_way_combs):
    item1 = comb[0]
    item2 = comb[1]

    if (item1 in dominated_indices) or (item2 in dominated_indices):
        continue

    comparison = compare_two_solutions(item1, item2)

    if comparison:
        dominated_indices.append(comparison)
        continue

    epsilon_check = epsilon_comparison(item1, item2, reference_point)
    if epsilon_check:
        dominated_indices.append(epsilon_check)

40755it [00:40, 1005.37it/s]


Now, we have all the dominated indices. After dropping these, we can generate two csv's: one for decision variables, one for objective values.

In [20]:
final_objs = full_objectives.loc[~full_objectives.index.isin(dominated_indices)]
final_objs = final_objs.reset_index().drop(columns=["seed_index", "old_index"])
final_objs = final_objs.drop(columns=[i for i in final_objs.columns if "floor" in i])
final_dvs = full_dvs.loc[~full_dvs.index.isin(dominated_indices)]
final_dvs = final_dvs.reset_index().drop(columns=["seed_index", "old_index"])

final_dvs.to_csv("merged_dvs.csv", index=False)
final_objs.to_csv("merged_objectives.csv", index=False)