In [1]:
import json

In [2]:
import os

In [3]:
import pandas as pd

In [4]:
result_directories = [
    "baseline_results/baseline_results_10_dims",
    "baseline_results/baseline_results_50_dims",
    "baseline_results/baseline_results_100_dims",
    "baseline_results/baseline_results_gurobi"
]

In [5]:
def extract_function(file_name):
    candidates = ["Michalewicz", "Levy", "Ackley"]
    for cand in candidates:
        if cand in file_name:
            return cand
    
    assert False, "Function is not recognized from candidates {} for file {}.".format(candidates, file_name)

In [6]:
def extract_dimension(file_name):
    dimension = file_name.split("_")[1]
    assert str.isdigit(dimension), "Dimension {} is not an integer.".format(dimension)
    return int(dimension)

In [7]:
def extract_solver(file_name):
    candidates = ["scipy", "gurobi"]
    for cand in candidates:
        if cand in file_name:
            return cand
    
    assert False, "Solver is not recognized from candidates {} for file {}.".format(candidates, file_name)

In [8]:
def extract_algorithm(file_name):
    candidates = ["direct", "shgo", "differential_evolution", "dual_annealing", "brute", "basinhopping"]
    for cand in candidates:
        if cand in file_name:
            return cand
    
    assert False, "Algorithm is not recognized from candidates {} for file {}.".format(candidates, file_name)

In [9]:
def extract_seed(file_name):
    seed = file_name.strip(".json").split("_")[-1]
    assert str.isdigit(seed), "Seed {} is not an integer.".format(seed)
    return int(seed)

In [10]:
results = []

for result_dir in result_directories:
    path = os.path.join(result_dir, "results")
    for fn in os.listdir(path):
        entry_dict = {}
        function = extract_function(fn)
        solver = extract_solver(fn)
        if solver == "gurobi":
            algorithm = None
        else:
            algorithm = extract_algorithm(fn)
        dimension = extract_dimension(fn)
        seed = extract_seed(fn)
        
        info_dict = {
            "function": function,
            "solver": solver,
            "algorithm": algorithm,
            "dimension": dimension,
            "seed": seed,
        }
        with open(os.path.join(path, fn)) as f:
            result_dict = json.load(f)
        entry_dict.update(info_dict)
        entry_dict.update(result_dict)
        
        results.append(entry_dict)

In [11]:
for result in results:
    if result['algorithm'] == None:
        result['algorithm'] = 'gurobi'

In [12]:
df = pd.DataFrame(results)
df['timeout_reached'] = df['timeout_reached'].fillna(False)

In [13]:
result_df = df.sort_values(by=['dimension', 'function', 'solver', 'algorithm', 'seed'])

In [14]:
result_df

Unnamed: 0,function,solver,algorithm,dimension,seed,x,obj_val,time_elapsed,timeout_reached
247,Ackley,gurobi,gurobi,10,0,"[0.23199999999999776, 0.23199999999999776, 0.2...",1.042645,0.013975,False
264,Ackley,gurobi,gurobi,10,1,"[0.23199999999999776, 0.23199999999999776, 0.2...",1.042645,0.017745,False
230,Ackley,gurobi,gurobi,10,2,"[0.23199999999999776, 0.23199999999999776, 0.2...",1.042645,0.067047,False
236,Ackley,gurobi,gurobi,10,3,"[0.23199999999999776, 0.23199999999999776, 0.2...",1.042645,0.039329,False
225,Ackley,gurobi,gurobi,10,4,"[0.23199999999999776, 0.23199999999999776, 0.2...",1.042645,0.071417,False
...,...,...,...,...,...,...,...,...,...
184,Michalewicz,scipy,shgo,100,0,,,300.179816,True
180,Michalewicz,scipy,shgo,100,1,,,300.140381,True
151,Michalewicz,scipy,shgo,100,2,,,300.163152,True
166,Michalewicz,scipy,shgo,100,3,,,300.185741,True


In [15]:
statistics_df = df.groupby(by=['dimension', 'function', 'solver', 'algorithm']).agg(
    obj_mean=('obj_val', 'mean'),
    obj_min=('obj_val', 'min'),
    obj_max=('obj_val', 'max'),
    time_mean=('time_elapsed', 'mean'),
    time_min=('time_elapsed', 'min'),
    time_max=('time_elapsed', 'max'),
    num_seeds=('seed', 'count'),
    num_timeout=('timeout_reached', 'sum'),
).reset_index()

In [16]:
statistics_df

Unnamed: 0,dimension,function,solver,algorithm,obj_mean,obj_min,obj_max,time_mean,time_min,time_max,num_seeds,num_timeout
0,10,Ackley,gurobi,gurobi,1.042645,1.042645,1.042645,0.041903,0.013975,0.071417,5,0
1,10,Ackley,scipy,basinhopping,20.35082,19.59414,20.88108,2.863764,2.354912,3.241648,5,0
2,10,Ackley,scipy,differential_evolution,9.536743e-07,9.536743e-07,9.536743e-07,35.762184,20.978366,43.802227,5,0
3,10,Ackley,scipy,direct,9.536743e-07,9.536743e-07,9.536743e-07,2.037106,1.872341,2.143688,5,0
4,10,Ackley,scipy,dual_annealing,0.0004240036,0.0002775192,0.0006971359,19.568395,14.712697,32.816207,5,0
5,10,Ackley,scipy,shgo,9.536743e-07,9.536743e-07,9.536743e-07,139.647766,118.605968,165.371123,5,0
6,10,Levy,gurobi,gurobi,-2.501127e-06,-2.501127e-06,-2.501127e-06,1.444201,1.354378,1.624628,5,0
7,10,Levy,scipy,basinhopping,22.41383,7.975127,34.31707,4.138999,3.700006,4.351144,5,0
8,10,Levy,scipy,differential_evolution,7.684822e-15,7.66532e-15,7.713235e-15,51.508605,48.664711,54.14178,5,0
9,10,Levy,scipy,direct,0.1335935,0.1335935,0.1335935,2.693021,2.582871,2.814984,5,0


In [18]:
result_df.to_csv("result.csv", index=False)
statistics_df.to_csv("stats.csv", index=False)