In [1]:
import pandas as pd
import numpy as np
import copy
import os
from tqdm import tqdm
import json

In [2]:
import matplotlib.pyplot as plt

In [3]:
from pandarallel import pandarallel

pandarallel.initialize(progress_bar=False)

INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


## For case studies

In [4]:
benchmark_directory = "/projects/mcmm/results_final/"

In [5]:
def get_info_from_filename(np_fname):
    splitted = np_fname.lower().split("_")
    func = splitted[0]
    dim = int(splitted[1])
    
    algorithm = None
    if "basinhopping" in np_fname:
        assert algorithm is None; algorithm = "basinhopping"
    elif "differential_evolution" in np_fname:
        assert algorithm is None; algorithm = "differential_evolution"
    elif "direct" in np_fname:
        assert algorithm is None; algorithm = "direct"
    elif "dual_annealing" in np_fname:
        assert algorithm is None; algorithm = "dual_annealing"
    elif "shgo" in np_fname:
        assert algorithm is None; algorithm = "shgo"
    elif "simulated_annealing" in np_fname:
        assert algorithm is None; algorithm = "simulated_annealing"
    elif "mcir" in np_fname:
        assert algorithm is None; algorithm = "mcir"
    
    assert algorithm is not None, "algorithm not found in {}".format(np_fname)
    
    info = {
        "function": func,
        "dimension": dim,
        "algorithm": algorithm,
    }
    return info

In [6]:
all_results = []
already_run_file_prefix = []

for filename in os.listdir(benchmark_directory):
    if not filename.endswith("_Y.npy"):
        print("Skipping {}".format(filename))
        continue

    result_dict = get_info_from_filename(filename)
    result_dict['obj_list'] = np.load(os.path.join(benchmark_directory, filename))
    
    already_run_file_prefix.append(filename[:-6])
    
    stats_filename = filename[:-6] + ".json"
    stats_filepath = os.path.join(benchmark_directory, "stats", stats_filename)
    
    if os.path.exists(stats_filepath):
        with open(stats_filepath) as f:
            stats = json.load(f)

        result_dict['total_time'] = stats['time_elapsed']
        result_dict['timeout_reached'] = stats['timeout_reached']
    else:
        result_dict['total_time'] = -1
        result_dict['timeout_reached'] = None
    
    all_results.append(result_dict)

Skipping stats


In [7]:
# Process gurobi runs
for filename in os.listdir(os.path.join(benchmark_directory, "stats")):
    if not filename.endswith(".json"):
        print("Skipping {}".format(filename))
        continue
    
    if 'gurobi' in filename:
        print("Processing {}".format(filename))
        
        func = filename.lower().split("_")[0]
        dim = int(filename.lower().split("_")[1])
        algorithm = filename.lower().split("_")[2]
        
        with open(os.path.join(benchmark_directory, "stats", filename)) as f:
            stats = json.load(f)
        
        result_dict = {
            "function": func,
            "dimension": dim,
            "algorithm": algorithm,
            "obj_list": [stats['obj_val']],
            "total_time": stats['time_elapsed'],
            "timeout_reached": None,
        }
        
        all_results.append(result_dict)

Processing Levy_200_gurobi_4.json
Processing Michalewicz_200_gurobi_4.json
Processing Ackley_50_gurobi_1.json
Processing Levy_100_gurobi_0.json
Processing Michalewicz_100_gurobi_3.json
Processing Levy_200_gurobi_3.json
Processing Michalewicz_100_gurobi_0.json
Processing Ackley_200_gurobi_2.json
Processing Levy_200_gurobi_2.json
Processing Michalewicz_200_gurobi_2.json
Processing Ackley_100_gurobi_2.json
Processing Ackley_100_gurobi_1.json
Processing Michalewicz_100_gurobi_4.json
Processing Michalewicz_200_gurobi_3.json
Processing Michalewicz_10_gurobi_0.json
Processing Levy_100_gurobi_3.json
Processing Michalewicz_200_gurobi_1.json
Processing Michalewicz_10_gurobi_1.json
Processing Ackley_100_gurobi_0.json
Processing Ackley_10_gurobi_2.json
Processing Ackley_50_gurobi_0.json
Processing Levy_100_gurobi_1.json
Processing Michalewicz_50_gurobi_4.json
Processing Levy_100_gurobi_2.json
Processing Michalewicz_10_gurobi_3.json
Processing Ackley_10_gurobi_1.json
Processing Levy_200_gurobi_0.js

In [8]:
def find_first_reach_sample(nums):
    first_reach_sample = len(nums)
    best_sample_val = float('inf')
    for idx, val in enumerate(nums):
        if val < best_sample_val:
            best_sample_val = val
            first_reach_sample = idx
    
    return (best_sample_val, first_reach_sample)

In [9]:
result_df = pd.DataFrame(all_results)
result_df

Unnamed: 0,function,dimension,algorithm,obj_list,total_time,timeout_reached
0,nn-ackley,50,basinhopping,"[21.189709, 21.189709, 21.189709, 21.189709, 2...",108.239230,False
1,ackley,50,basinhopping,"[20.977924, 20.977924, 20.977924, 20.977924, 2...",5.943726,False
2,levy,50,mcir,"[479.7550964355469, 479.7550964355469, 479.755...",-1.000000,
3,harkerp,101,mcir,"[12967036.0, 12967036.0, 12967036.0, 248121.71...",-1.000000,
4,michalewicz,200,mcir,"[-23.66192054748535, -23.66192054748535, -23.6...",-1.000000,
...,...,...,...,...,...,...
681,levy,200,gurobi,[-0.000163713388551661],24.009706,
682,michalewicz,50,gurobi,[-49.85208419858109],4.966436,
683,ackley,100,gurobi,[2.021709219484489],0.156366,
684,michalewicz,200,gurobi,[-202.46349824889242],26.289410,


In [10]:
result_df['obj_stat'] = result_df['obj_list'].parallel_apply(find_first_reach_sample)

In [11]:
result_df['best_y'] = result_df['obj_stat'].apply(lambda x: x[0])
result_df['first_reach_sample'] = result_df['obj_stat'].apply(lambda x: x[1])
result_df['total_sample'] = result_df['obj_list'].parallel_apply(len)

In [12]:
result_df = result_df.drop(['obj_list', 'obj_stat'], axis=1)

In [13]:
result_df = result_df.sort_values(["function", "dimension", "algorithm", "best_y", "first_reach_sample", "total_time"])

In [14]:
result_df = result_df[[
    'function', 'dimension', 'algorithm', 'best_y', 'total_time', 
    'total_sample', 'first_reach_sample', 'timeout_reached']]

In [15]:
result_df.to_pickle("appen_raw_results.pkl")
result_df

Unnamed: 0,function,dimension,algorithm,best_y,total_time,total_sample,first_reach_sample,timeout_reached
351,ackley,10,basinhopping,19.079865,1.612064,11517,5159,False
59,ackley,10,basinhopping,19.804897,1.954477,11363,5929,False
436,ackley,10,basinhopping,19.835838,1.707414,11440,4224,False
339,ackley,10,basinhopping,20.304733,1.725310,11407,1243,False
105,ackley,10,basinhopping,20.540028,1.602051,11671,8294,False
...,...,...,...,...,...,...,...,...
54,watson,32,shgo,inf,3601.669902,0,0,True
288,watson,32,shgo,inf,3601.854003,0,0,True
148,watson,32,shgo,inf,3601.968753,0,0,True
559,watson,32,shgo,inf,3601.974911,0,0,True


In [16]:
result_df[result_df['algorithm'] != 'mcir'].to_csv('results_no_mcir.csv', index=False)

In [17]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
result_df

Unnamed: 0,function,dimension,algorithm,best_y,total_time,total_sample,first_reach_sample,timeout_reached
351,ackley,10,basinhopping,19.07986,1.612064,11517,5159,False
59,ackley,10,basinhopping,19.8049,1.954477,11363,5929,False
436,ackley,10,basinhopping,19.83584,1.707414,11440,4224,False
339,ackley,10,basinhopping,20.30473,1.72531,11407,1243,False
105,ackley,10,basinhopping,20.54003,1.602051,11671,8294,False
591,ackley,10,differential_evolution,9.536743e-07,34.481256,28811,23468,False
268,ackley,10,differential_evolution,9.536743e-07,28.754371,28661,24270,False
182,ackley,10,differential_evolution,9.536743e-07,11.60809,31961,26085,False
410,ackley,10,differential_evolution,9.536743e-07,35.90161,31061,27164,False
404,ackley,10,differential_evolution,9.536743e-07,38.496363,32111,27478,False
