In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [None]:
def binary2decimal(chromosome, n_genes, n_alleles, scale, offset):
    chromosome = chromosome[::-1]
    conversion = []
    for i in range(n_genes):
        decimal_conversion = []
        for j in range(i*n_alleles, (i+1)*n_alleles):
            decimal_conversion.append(chromosome[j]*2**(j-n_alleles*i))
        conversion.append((np.sum(decimal_conversion)-offset)/scale)
    return np.array(conversion)

In [None]:
def gaussian(x, mu, sigma):
    return (1/(sigma * np.sqrt(2 * np.pi))) * np.exp((-(x - mu)**2) / (2 * sigma**2))

In [None]:
def prepare_data(name_file):
    global x_train, x_test, y_train, y_test, scaler
    data = pd.read_csv(name_file)
    print(data.info())
    data = data[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'Age', 'Outcome']]
    x = data.iloc[:, 0:6].values
    print(x)
    y = data.Outcome.values
    scaler = MinMaxScaler()
    scaler.fit(x)
    x = scaler.transform(x)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
    
    

In [None]:
def fitness_evaluation(x, n_genes, n_alleles, scale, offset):
    x = binary2decimal(x, n_genes, n_alleles, scale, offset)
    
    mu_1 = x[0]
    sigma_1 = x[1]
    mu_2 = x[2]
    sigma_2 = x[3]
    mu_3 = x[4]
    sigma_3 = x[5]
    mu_4 = x[6]
    sigma_4 = x[7]
    mu_5 = x[8]
    sigma_5 = x[9]
    mu_6 = x[10]
    sigma_6 = x[11]
    mu_7 = x[12]
    sigma_7 = x[13]
    try:
        x_train[0]
    except:
        prepare_data('diabetes.csv')
    
    x = x_train
    y = y_train
    error = []
    for i in range(x.shape[0]):
        p_value1 = gaussian(x[i][0], mu_1, sigma_1)
        p_value2 = gaussian(x[i][1], mu_2, sigma_2)
        p_value3 = gaussian(x[i][2], mu_3, sigma_3)
        p_value4 = gaussian(x[i][3], mu_4, sigma_4)
        p_value5 = gaussian(x[i][4], mu_5, sigma_5)
        p_value6 = gaussian(x[i][5], mu_6, sigma_6)
        p_value7 = gaussian(x[i][6], mu_7, sigma_7)
        
        P = p_value1 * p_value2 * p_value3 * p_value4 * p_value5 * p_value6 * p_value7
        error.append(np.abs(P - y[i]))
    
    fitness_value = [np.sum(error) / len(error)]
    print(f"Fitness value: {fitness_value}")
    
    return fitness_value

In [None]:
def final_result(best_chromosome, array_best_fitness, array_worst_fitness, n_genes, n_alleles, scale, offset):
    global x_train, x_test, y_train, y_test
    
    best_chromosome = binary2decimal(best_chromosome, n_genes, n_alleles, scale, offset)
    
    # Extract Gaussian parameters
    mu_1, sigma_1 = best_chromosome[0], best_chromosome[1]
    mu_2, sigma_2 = best_chromosome[2], best_chromosome[3]
    mu_3, sigma_3 = best_chromosome[4], best_chromosome[5]
    mu_4, sigma_4 = best_chromosome[6], best_chromosome[7]
    mu_5, sigma_5 = best_chromosome[8], best_chromosome[9]
    mu_6, sigma_6 = best_chromosome[10], best_chromosome[11]
    mu_7, sigma_7 = best_chromosome[12], best_chromosome[13]
    
    # Calculate predictions
    y_obtained = []
    for i in range(x_train.shape[0]):
        p_value1 = gaussian(x_train[i][0], mu_1, sigma_1)
        p_value2 = gaussian(x_train[i][1], mu_2, sigma_2)
        p_value3 = gaussian(x_train[i][2], mu_3, sigma_3)
        p_value4 = gaussian(x_train[i][3], mu_4, sigma_4)
        p_value5 = gaussian(x_train[i][4], mu_5, sigma_5)
        p_value6 = gaussian(x_train[i][5], mu_6, sigma_6)
        p_value7 = gaussian(x_train[i][6], mu_7, sigma_7)
        
        P = p_value1 * p_value2 * p_value3 * p_value4 * p_value5 * p_value6 * p_value7
        y_obtained.append((P > 0.5) * 1)
    
    # Calculate accuracy
    accuracy = np.mean(np.array(y_obtained) == y_train)
    
    print("########################################################################")
    print("#")
    print("# GENETIC ALGORITHM RESULTS")
    print(f"#   Best fitness: {array_best_fitness[-1]}")
    print(f"#   Best candidate solution: {best_chromosome}")
    print("#")
    print("# GAUSSIAN PARAMETERS OBTAINED")
    print(f"#   Feature 1 - mu: {mu_1}, sigma: {sigma_1}")
    print(f"#   Feature 2 - mu: {mu_2}, sigma: {sigma_2}")
    print(f"#   Feature 3 - mu: {mu_3}, sigma: {sigma_3}")
    print(f"#   Feature 4 - mu: {mu_4}, sigma: {sigma_4}")
    print(f"#   Feature 5 - mu: {mu_5}, sigma: {sigma_5}")
    print(f"#   Feature 6 - mu: {mu_6}, sigma: {sigma_6}")
    print(f"#   Feature 7 - mu: {mu_7}, sigma: {sigma_7}")
    print("#")
    print("# PERFORMANCE METRICS")
    print(f"#   Average Classification Error: {array_best_fitness[-1]}")
    print(f"#   Training Data Size: {len(x_train)}")
    print(f"#   Test Data Size: {len(x_test)}")
    print(f"#   Training Accuracy: {accuracy:.4f}")
    print("#")
    print("########################################################################")
    plt.figure(figsize=(10, 10))
    plt.plot(array_worst_fitness, label='Worst Fitness', color='red')
    plt.plot(array_best_fitness, label='Best Fitness', color='green')
    plt.title('Fitness Evolution - Gaussian Classification')
    plt.xlabel('Generation')
    plt.ylabel('Fitness (Average Error)')
    plt.legend()
    plt.show()