In [None]:
import pandas as pd
import numpy as np
import os
import glob
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ttest_rel, wilcoxon, shapiro
from sklearn.model_selection import GridSearchCV
import gc
import itertools
from sklearn.utils import resample
import ast
import json
import re

import utils 
import model_train
from constants import *
import particle_swarm

%load_ext autoreload
%autoreload 2

In [None]:
# List of file paths
df = pd.read_csv("/PATH/data.csv")
df

In [None]:
df = utils.data_prep(df)

bounds = utils.get_nqi_bounds(df, NQIs)
levels = utils.get_cqi_levels(df, CQIs)
nqi_means = df[NQIs].mean()

In [None]:
# Define a list of ML models
models = [
    #("RF",RandomForestClassifier(criterion='entropy', random_state=42)),
    #("DT",DecisionTreeClassifier(criterion='entropy', random_state=42)),
    ("LR",LogisticRegression(solver='lbfgs', max_iter=1000,random_state=42,n_jobs=-1)),
    # ("NB",GaussianNB()),
    #("SVM",SGDClassifier(loss='hinge',n_jobs=-1,max_iter=1000,random_state=42)),
    #("NN",MLPClassifier(random_state=42))   
]

In [None]:
# Define the dictionary with all parameter values
parameters_dic = {'gamma': [1],
                  'k': [20],
                  'n_cluster': [50],
                  # 'l_multi_IL': [1],
                  'l_multi_k': [0],
                  'l_multi_ML': [1] # np.linspace(0, 1, 200)
                  }

# Generate all combinations of parameters using itertools.product
param_combinations = list(itertools.product(*parameters_dic.values()))

In [None]:
results = particle_swarm.run_particle_swarm_experiment(
    df=df, 
    models=models, 
    param_combinations=param_combinations, 
    NQIs=NQIs, 
    CQIs=CQIs, 
    n_population=100,
    maxIter=100,
    n_bootstrap=1,
    bounds=bounds, 
    levels=levels, 
    nqi_means=nqi_means, 
    filedirectory="/PATH/Folder"
)

In [None]:
results

In [None]:
# save the results to a file
results_df = pd.DataFrame(results)
filedirectory = '/PATH/FOLDER'
filename = f"results.csv"
filename = os.path.join(filedirectory, filename)
results_df.to_csv(filename, index=False)
        

In [None]:
results_df = pd.read_csv("/PATH/FOLDER/results.csv")

In [None]:
utils.plot_metric_trend_with_mean(results_df, 'Entropy-Loss', 'Average Entropy Loss')