# Exact Method
Using Gurobi solver

In [1]:
%config Completer.use_jedi = False

In [2]:
import math
import numpy as np
import pandas as pd
import gurobipy as gp
from gurobipy import GRB
import seaborn as sns
from matplotlib import pyplot as plt
from scipy.spatial.distance import pdist

In [3]:
import os
DATASETS_BASEPATH = '../datasets/'
synthetic_datasets = os.listdir(f'{DATASETS_BASEPATH}/sklearn-datasets/meta-features/')
real_datasets = os.listdir(f'{DATASETS_BASEPATH}/real-datasets')

In [20]:
pd.read_csv('../datasets/real-datasets/compas_metadata.csv')

Unnamed: 0,instances,feature_CL,feature_CLD,feature_DCP,feature_F1,feature_Harmfulness,feature_N1,feature_N2,feature_TD_P,feature_TD_U,feature_kDN,algo_bagging,algo_gradient_boosting,algo_logistic_regression,algo_mlp,algo_random_forest,algo_svc_linear,algo_svc_rbf
0,1,0.817262,0.817262,0.645463,0.076923,0.002014,1.0,0.000000,0.75,0.346154,0.6,0.783429,1.294225,1.086574,1.138194,1.152911,1.098590,1.218375
1,2,0.061321,0.061321,0.310160,0.230769,0.000403,0.0,1.000000,0.75,0.576923,0.2,1.038007,0.228349,0.327910,0.300339,0.204690,0.313141,0.219436
2,3,0.144556,0.144556,0.348606,0.153846,0.000000,0.0,0.408284,0.75,0.692308,0.2,0.437654,0.395576,0.268317,0.245394,0.490364,0.268453,0.328142
3,4,0.004853,0.004853,0.234506,0.076923,0.000000,0.0,0.000000,0.75,0.500000,0.2,0.316959,0.248446,0.233864,0.229492,0.219892,0.227627,0.327807
4,5,0.075165,0.075165,0.354537,0.076923,0.000000,1.0,0.000000,0.75,0.500000,0.2,0.648114,0.352047,0.527617,0.512862,0.428196,0.522380,0.371585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5273,5274,0.927173,0.927173,0.645463,0.076923,0.001611,1.0,0.000000,0.75,0.461538,0.6,0.978772,1.316957,1.040314,1.087224,1.245815,1.043946,1.207007
5274,5275,0.406834,0.406834,0.518750,0.000000,0.000000,1.0,0.000000,1.00,0.423077,0.8,0.703149,0.871755,0.787233,0.819198,0.774348,0.827964,0.816926
5275,5276,0.404776,0.404776,0.281915,0.076923,0.000000,0.0,0.000000,1.00,0.384615,0.4,0.505306,0.452444,0.732361,0.748449,0.632225,0.763739,0.997945
5276,5277,0.404776,0.404776,0.281915,0.076923,0.000716,0.0,0.000000,1.00,0.384615,0.4,0.475926,0.446878,0.710063,0.724876,0.627801,0.757990,0.459586


In [16]:
datasets_dictionary = dict()
for f in synthetic_datasets:
    try:
        df = pd.read_csv(f'{DATASETS_BASEPATH}/sklearn-datasets/meta-features/{f}')
        target_columns = [col for col in df.columns if col.startswith('target')]
        datasets_dictionary[f] = (df, target_columns)
    except:
        pass

for f in real_datasets:
    try:
        df = pd.read_csv(f'{DATASETS_BASEPATH}/real-datasets{f}').drop(columns = ['instances'])
        target_columns = [col for col in df.columns if col.startswith('algo')]
        datasets_dictionary[f] = (df, target_columns)
    except:
        pass

In [5]:
df = pd.read_csv('../datasets/real-datasets/compas_metadata.csv').drop(columns = ['instances'])
performance_columns = [col for col in df.columns if col.startswith('algo')]
X = df.drop(columns = performance_columns)
Y = df[performance_columns][['algo_bagging']]
# F = X.to_numpy()
# Y = Y.to_numpy()

# # Como pelo jeito não consigo modelar nosso problema matricial, vamos pegar uma parte de F e Y
# F = np.array(F[0,0])
# y = np.array(Y[0,0])

In [6]:
from dataclasses import dataclass

@dataclass
class OptimizationOutput:
    A: np.ndarray
    B: np.ndarray
    C: np.ndarray
    Z: np.ndarray
    error: float
    execution_time: float
    n_tries: int
    search_space: float
    method_name: str

In [7]:
import time

def exact_method(X, Y):
    F_matrix = X.to_numpy()
    Y_matrix = Y.to_numpy()
    
    A = list()
    B = list()
    C = list()
    Z = list() 
    execution_time = list() #end_time - start_time
    ntries = 1
    search_space = 0
    for i in range(F_matrix[:10].shape[0]):
        # Como pelo jeito não consigo modelar nosso problema matricial, vamos pegar uma parte de F e Y
        F = F_matrix[i, 0]
        y = Y_matrix[i, 0]
        try:

            # Create a new model
            mod = gp.Model("mod1") 

            # Create variables
            Ar = mod.addVar(vtype=GRB.CONTINUOUS, name="Ar") 
            Br = mod.addVar(vtype=GRB.CONTINUOUS, name="Br")
            cr = mod.addVar(vtype=GRB.CONTINUOUS, name="cr")

            # Set objective
            mod.setObjective((F - Br*Ar*F) + (y - cr*Ar*F), 
                             GRB.MINIMIZE)

            # Adjust parameters
            mod.params.NonConvex = 2

            start_time = time.time()
            mod.optimize()
            end_time = time.time()

            A.append(Ar.X)
            B.append(Br.X)
            C.append(cr.X)
            Z.append(mod.objVal)
            execution_time.append(end_time - start_time)
            ntries = 1
            search_space = 0

    #         # print variables
    #         for v in mod.getVars():
    #             print('%s %g' % (v.varName, v.x))

    #         #print optimized objective function
    #         print('Obj: %g' % mod.objVal)

        except gp.GurobiError as e:
            pass
            print('Error code ' + str(e.errno) + ": " + str(e))

        except AttributeError:
            pass
            print('Encountered an attribute error')
            print(e.message)
            
    out = OptimizationOutput(A, 
                             B, 
                             C, 
                             Z, 0, np.sum(execution_time), ntries, search_space, 'exact-method')
    
    return out

In [8]:
from experimentation import run_optimization

In [17]:
for filename, item in datasets_dictionary.items():
    print(item[1])

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]


In [14]:
%%time
experiments = dict()
for filename, item in datasets_dictionary.items():
    experiments[filename] = run_optimization(df = item[0], 
                                             target_columns = item[1][0], 
                                             optimization_method = exact_method)

IndexError: list index out of range

In [10]:
def plot_scatter_plot(x, y, title = None, x_label = None, y_label = None, hue_data = None):
    fig, ax = plt.subplots(figsize = (10, 8))
    n_colors = len(hue_data.unique())
    sns.scatterplot(x = x, y = y, hue = hue_data, palette = sns.color_palette("vlag", n_colors), legend = False)
    ax.set_title(title)
    sns.despine()

In [11]:
# for dataset_filename in experiments.keys():
#     plot_scatter_plot(x = experiments[dataset_filename].Z[0],
#                      y = experiments[dataset_filename].Z[1],
#                       hue_data = datasets_dictionary[dataset_filename][0].iloc[:, 0],
#                      title = dataset_filename)

In [12]:
from experimentation import create_results_dataframe

exact_method_results_df = create_results_dataframe(experiments)
exact_method_results_df

Unnamed: 0,dataset,error,execution_time,method,n_tries,search_space


In [13]:
exact_method_results_df.to_excel(f'../results/exact-method-results.xlsx')