# Exact Method
Using Gurobi solver

In [1]:
%config Completer.use_jedi = False

In [25]:
import math
import numpy as np
import pandas as pd
import gurobipy as gp
from gurobipy import GRB
import seaborn as sns
from matplotlib import pyplot as plt
from scipy.spatial.distance import pdist

In [3]:
import os
DATASETS_BASEPATH = '../datasets/'
synthetic_datasets = os.listdir(f'{DATASETS_BASEPATH}/sklearn-datasets')
real_datasets = os.listdir(f'{DATASETS_BASEPATH}/sklearn-datasets')

In [4]:
datasets_dictionary = dict()
for f in synthetic_datasets:
    try:
        df = pd.read_csv(f'{DATASETS_BASEPATH}/sklearn-datasets/{f}')
        target_columns = [col for col in df.columns if col.startswith('target')]
        datasets_dictionary[f] = (df, target_columns)
    except:
        pass

for f in real_datasets:
    try:
        df = pd.read_csv(f'{DATASETS_BASEPATH}/real-datasets{f}').drop(columns = ['instances'])
        target_columns = [col for col in df.columns if col.startswith('algo')]
        datasets_dictionary[f] = (df, target_columns)
    except:
        pass

In [5]:
df = pd.read_csv('../datasets/real-datasets/compas_metadata.csv').drop(columns = ['instances'])
performance_columns = [col for col in df.columns if col.startswith('algo')]
X = df.drop(columns = performance_columns)
Y = df[performance_columns][['algo_bagging']]
# F = X.to_numpy()
# Y = Y.to_numpy()

# # Como pelo jeito não consigo modelar nosso problema matricial, vamos pegar uma parte de F e Y
# F = np.array(F[0,0])
# y = np.array(Y[0,0])

In [6]:
from dataclasses import dataclass

@dataclass
class OptimizationOutput:
    A: np.ndarray
    B: np.ndarray
    C: np.ndarray
    Z: np.ndarray
    error: float
    execution_time: float
    n_tries: int
    search_space: float
    method_name: str

In [15]:
import time

def exact_method(X, Y):
    F_matrix = X.to_numpy()
    Y_matrix = Y.to_numpy()
    
    A = list()
    B = list()
    C = list()
    Z = list() 
    execution_time = list() #end_time - start_time
    ntries = 1
    search_space = 0
    for i in range(F_matrix[:10].shape[0]):
        print(f"optimizing {i} data point")
        # Como pelo jeito não consigo modelar nosso problema matricial, vamos pegar uma parte de F e Y
        F = F_matrix[i, 0]
        y = Y_matrix[i, 0]
        try:

            # Create a new model
            mod = gp.Model("mod1") 

            # Create variables
            Ar = mod.addVar(vtype=GRB.CONTINUOUS, name="Ar") 
            Br = mod.addVar(vtype=GRB.CONTINUOUS, name="Br")
            cr = mod.addVar(vtype=GRB.CONTINUOUS, name="cr")

            # Set objective
            mod.setObjective((F - Br*Ar*F) + (y - cr*Ar*F), 
                             GRB.MINIMIZE)

            # Adjust parameters
            mod.params.NonConvex = 2

            start_time = time.time()
            mod.optimize()
            end_time = time.time()

            A.append(Ar.X)
            B.append(Br.X)
            C.append(cr.X)
            Z.append(mod.objVal)
            execution_time.append(end_time - start_time)
            ntries = 1
            search_space = 0

    #         # print variables
    #         for v in mod.getVars():
    #             print('%s %g' % (v.varName, v.x))

    #         #print optimized objective function
    #         print('Obj: %g' % mod.objVal)

        except gp.GurobiError as e:
            pass
            print('Error code ' + str(e.errno) + ": " + str(e))

        except AttributeError:
            pass
            print('Encountered an attribute error')
            print(e.message)
            
    out = OptimizationOutput(A, 
                             B, 
                             C, 
                             Z, 0, np.sum(execution_time), ntries, search_space, 'exact-method')
    
    return out

In [16]:
from experimentation import run_optimization

In [17]:
%%time
experiments = dict()
for filename, item in datasets_dictionary.items():
    experiments[filename] = run_optimization(df = item[0], 
                                             target_columns = item[1], 
                                             optimization_method = exact_method)

optimizing 0 data point
Changed value of parameter NonConvex to 2
   Prev: -1  Min: -1  Max: 2  Default: -1
Gurobi Optimizer version 9.1.2 build v9.1.2rc0 (mac64)
Thread count: 2 physical cores, 4 logical processors, using up to 4 threads
Optimize a model with 0 rows, 3 columns and 0 nonzeros
Model fingerprint: 0xe72a96bc
Model has 2 quadratic objective terms
Coefficient statistics:
  Matrix range     [0e+00, 0e+00]
  Objective range  [0e+00, 0e+00]
  QObjective range [6e-01, 6e-01]
  Bounds range     [0e+00, 0e+00]
  RHS range        [0e+00, 0e+00]

Continuous model is non-convex -- solving as a MIP.

Found heuristic solution: objective 1.3231222
Found heuristic solution: objective -2.58498e+18
Presolve time: 0.00s
Presolved: 5 rows, 7 columns, 11 nonzeros
Presolved model has 2 bilinear constraint(s)
Variable types: 7 continuous, 0 integer (0 binary)

Root relaxation: unbounded, 3 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl 

In [19]:
def plot_scatter_plot(x, y, title = None, x_label = None, y_label = None, hue_data = None):
    fig, ax = plt.subplots(figsize = (10, 8))
    n_colors = len(hue_data.unique())
    sns.scatterplot(x = x, y = y, hue = hue_data, palette = sns.color_palette("vlag", n_colors), legend = False)
    ax.set_title(title)
    sns.despine()

In [27]:
# for dataset_filename in experiments.keys():
#     plot_scatter_plot(x = experiments[dataset_filename].Z[0],
#                      y = experiments[dataset_filename].Z[1],
#                       hue_data = datasets_dictionary[dataset_filename][0].iloc[:, 0],
#                      title = dataset_filename)

In [21]:
from experimentation import create_results_dataframe

exact_method_results_df = create_results_dataframe(experiments)
exact_method_results_df

Unnamed: 0,dataset,error,execution_time,method,n_tries,search_space
0,100_features_0.1_error.csv,0,11.397221,exact-method,1,0
1,10_features_0.1_error.csv,0,5.601737,exact-method,1,0
2,5_features_0.01_error.csv,0,14.331114,exact-method,1,0
3,10_features_0.01_error.csv,0,3.960193,exact-method,1,0
4,5_features_0.1_error.csv,0,9.244771,exact-method,1,0
5,2_features_0.01_error.csv,0,9.936663,exact-method,1,0
6,2_features_0.1_error.csv,0,8.382743,exact-method,1,0
7,10_features_0.5_error.csv,0,16.197673,exact-method,1,0
8,100_features_0.5_error.csv,0,6.750462,exact-method,1,0
9,5_features_0.5_error.csv,0,12.726465,exact-method,1,0


In [23]:
exact_method_results_df.to_excel(f'../results/exact-method-results.xlsx')