In [1]:
import os
import sys

# Get the current working directory
cwd = os.getcwd()

# Add the path to the electre_tree module to the Python path
sys.path.append(os.path.join(cwd, 'ELECTRE_Tree'))

from electre_tree import tree_e_tri_b , util_e_tri_b


In [2]:
# Required Libraries
import numpy as np
import pandas as pd

In [3]:
df = pd.read_csv('/project/data_normalized.csv')
df.head(3)

Unnamed: 0,Storefront,Visible Sign of Business,Business WiFi,Business tables,Business Plugs,Business Internal CCTV,External CCTV,Number of female employees,Accepts credit card,Incline,...,Deaths,Injuries,Run over,Metro,Metrobus,Trolleybus,RTP,Concessioned,e-bike,Total coverage of transport
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,...,0.333333,0.866667,0.0,1.0,1.0,0.0,1.0,1.0,0.5,0.4
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.75,1.0,...,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.6
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,1.0,1.0,1.0,1.0,1.0,0.0,0.5,1.0,1.0,0.4


In [4]:
# Convert to array format

arr = df.values
arr

array([[0.        , 0.        , 0.        , ..., 1.        , 0.5       ,
        0.4       ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        0.6       ],
       [0.        , 0.        , 0.        , ..., 1.        , 1.        ,
        0.4       ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.66666667,
        0.2       ],
       [0.        , 0.        , 0.        , ..., 1.        , 0.5       ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 1.        , 0.5       ,
        0.        ]])

In [None]:
# Parameters - ELECTRE Tree
rule      = 'pc'
classes   = 3
target    = []
Q         = []
P         = []
V         = []
W         = []
B         = []
models    = 10

# Parameters - GA
elite       = 30
eta         = 1
mu          = 2
size        = 60
rate        = 0.05
generations = 250
samples     = 0.10

In [None]:
# Train Model
models = tree_e_tri_b.tree_electre_tri_b(arr, 
                                         target_assignment = target, 
                                         W = W, 
                                         Q = Q, 
                                         P = P, 
                                         V = V, 
                                         B = B, 
                                         rule = rule, 
                                         number_of_classes = classes, 
                                         elite = elite, 
                                         eta = eta, 
                                         mu = mu, 
                                         population_size = size, 
                                         mutation_rate = rate, 
                                         generations = generations, 
                                         samples = samples, 
                                         number_of_models = models)

In [None]:
# Predict
prediction, solutions = tree_e_tri_b.predict(models, arr, verbose = False, rule = rule)

The function plots the data using principal component analysis (PCA) when the data has more than two dimensions. 

In [None]:
# Plot - Tree Model
util_e_tri_b.plot_points(arr, prediction)

In [None]:
from collections import Counter

# Classification 
counter = Counter(prediction)
for cls, count in counter.items():
    print(f'Class: {cls}, Number of Records: {count}')


In [None]:
# Elicitated Paramneters
w_mean, w_std, q_mean, q_std, p_mean, p_std, v_mean, v_std, b_mean, b_std, cut_mean, cut_std, acc_mean, acc_std = tree_e_tri_b.metrics(models, number_of_classes = classes)

print('Criteria Weights: ', np.around(w_mean, decimals = 2))
print('Q: ', np.around(q_mean, decimals = 2))
print('P: ', np.around(p_mean, decimals = 2))
print('V: ', np.around(v_mean, decimals = 2))
for i in range(0, len(b_mean)):
    print('b'+str(i+1)+': ', np.around(b_mean[i], decimals = 2))
    print('Lambda Cutoff: ', round(cut_mean,  2))
    print('Accuracy: ',      round(acc_mean,  2))

In [None]:
# Plot - Elicitated Parameters
e_tri = util_e_tri_b.electre_tri_b(arr, W = w_mean, Q = q_mean, P = p_mean, V = v_mean,
                                   B = b_mean, cut_level = cut_mean, verbose = False, rule = rule, graph = True) 

In [None]:
# Plot Tree Model - Decision Boundaries
tree_e_tri_b.plot_decision_boundaries(arr, models)  

In [None]:
# Plot Mean Model - Decision Boundaries  
model_mean = []
model_mean.append([w_mean, acc_mean, [], [], [], b_mean, cut_mean, [], [], q_mean, p_mean, v_mean])
tree_e_tri_b.plot_decision_boundaries(X, model_mean)

In [12]:
import random

def random_search(arr, classes, cut_level, rule, W, Q, P, V, B,model, iterations):
    # Define parameter space
    elite_space       = np.arange(15, 25)
    eta_space         = np.arange(2, 5)
    mu_space          = np.arange(5, 8)
    size_space        = np.arange(25, 35)
    rate_space        = np.linspace(0.01, 0.1, num=10)
    generations_space = np.arange(100, 200, step=50)
    samples_space     = np.linspace(0.2, 0.3, num=5)

    # Initialize variables to store best parameters and accuracy
    best_accuracy = 0
    best_params = {}

    # Iterate for a given number of trials
    for _ in range(iterations):
        try:
            # Sample random parameters
            elite = random.choice(elite_space)
            eta = random.choice(eta_space)
            mu = random.choice(mu_space)
            size = random.choice(size_space)
            rate = random.choice(rate_space)
            generations = random.choice(generations_space)
            samples = random.choice(samples_space)

            # Use the selected parameters to train the model
            models = tree_e_tri_b.tree_electre_tri_b(arr, 
                                                    W = W, 
                                                    Q = Q, 
                                                    P = P, 
                                                    V = V, 
                                                    B = B, 
                                                    cut_level = cut_level, 
                                                    rule = rule, 
                                                    number_of_classes = classes, 
                                                    elite = elite, 
                                                    eta = eta, 
                                                    mu = mu, 
                                                    population_size = size, 
                                                    mutation_rate = rate, 
                                                    generations = generations, 
                                                    samples = samples, 
                                                    number_of_models = model)
            
            # Get the predictions from the models
            # Get the predictions from the models
            prediction, solutions = tree_e_tri_b.predict(models, arr, verbose = False, rule = 'pc')
            try:
            # Get the accuracy from the models
                _, _, _, _, _, _, _, _, _, _, _, _, acc_mean, _ = tree_e_tri_b.metrics(models, number_of_classes = classes)
            except ZeroDivisionError:
                print('Zero division error occurred, skipping this set of parameters.')
                continue
            # Update the best accuracy and parameters if the current accuracy is better
            if acc_mean > best_accuracy:
                best_accuracy = acc_mean
                best_params = {'elite': elite, 'eta': eta, 'mu': mu, 'size': size, 'rate': rate, 'generations': generations, 'samples': samples}

        except ZeroDivisionError:
            print("Zero division error occurred, skipping this set of parameters.")

    return best_params, best_accuracy