In [1]:
import pandas as pd
import numpy as np
import os
import glob
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ttest_rel, wilcoxon, shapiro
from sklearn.model_selection import GridSearchCV
import gc
import itertools
from sklearn.utils import resample
import ast
import json
import re
from sklearn.base import clone
import copy

import utils 
import model_train
from constants import *
import particle_swarm


%load_ext autoreload
%autoreload 2

In [2]:
# List of file paths
df = pd.read_csv(path)
df

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,credit_risk
0,<0,6,critical/other existing credit,radio/tv,1169,no known savings,>=7,4,male single,none,...,real estate,67,none,own,2,skilled,1,yes,yes,good
1,0<=X<200,48,existing paid,radio/tv,5951,<100,1<=X<4,2,female div/dep/mar,none,...,real estate,22,none,own,1,skilled,1,none,yes,bad
2,no checking,12,critical/other existing credit,education,2096,<100,4<=X<7,2,male single,none,...,real estate,49,none,own,1,unskilled resident,2,none,yes,good
3,<0,42,existing paid,furniture/equipment,7882,<100,4<=X<7,2,male single,guarantor,...,life insurance,45,none,for free,1,skilled,2,none,yes,good
4,<0,24,delayed previously,new car,4870,<100,1<=X<4,3,male single,none,...,no known property,53,none,for free,2,skilled,2,none,yes,bad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,no checking,12,existing paid,furniture/equipment,1736,<100,4<=X<7,3,female div/dep/mar,none,...,real estate,31,none,own,1,unskilled resident,1,none,yes,good
996,<0,30,existing paid,used car,3857,<100,1<=X<4,4,male div/sep,none,...,life insurance,40,none,own,1,high qualif/self emp/mgmt,1,yes,yes,good
997,no checking,12,existing paid,radio/tv,804,<100,>=7,4,male single,none,...,car,38,none,own,1,skilled,1,none,yes,good
998,<0,45,existing paid,radio/tv,1845,<100,1<=X<4,4,male single,none,...,no known property,23,none,for free,1,skilled,1,yes,yes,bad


In [3]:
df = utils.data_prep(df)

bounds = utils.get_nqi_bounds(df, NQIs)
levels = utils.get_cqi_levels(df, CQIs)
nqi_means = df[NQIs].mean()

In [4]:
# Define a list of ML models
models = [
    ("DT", DecisionTreeClassifier(criterion='entropy', random_state=42)),
    # ("LR", LogisticRegression(solver='lbfgs', max_iter=100, random_state=42, n_jobs=-1)),
    # ("NB", GaussianNB()),
    # ("NN", MLPClassifier(random_state=42)),
    # ("RF", RandomForestClassifier(criterion='entropy', random_state=42)),
    # ("SVM", LinearSVC(random_state=42))   
]

In [5]:
# Define the dictionary with all parameter values
parameters_dic = {'gamma': [1],
                  'k': [20],
                  'n_cluster': [3],
                  'initial_violation_threshold': [10],
                  'violation_decay_rate': [0.5],
                  'penalty_weight': [1]
                  }

# Generate all combinations of parameters using itertools.product
param_combinations = list(itertools.product(*parameters_dic.values()))

In [9]:
results = particle_swarm.run_particle_swarm_experiment(
    df=df, 
    models=models, 
    param_combinations=param_combinations, 
    NQIs=NQIs, 
    CQIs=CQIs, 
    n_population=20,
    maxIter=20,
    n_bootstrap=2,
    bounds=bounds, 
    levels=levels, 
    nqi_means=nqi_means, 
    filedirectory='/Users/yusiwei/Library/CloudStorage/OneDrive-Personal/research/Fourth Year Paper/Experiments/2nd experiments/Experiment results/V2/Anonymized Data'
)

Running with k = 20, n_cluster = 3,  initial_violation_threshold = 10, violation_decay_rate = 0.5, penalty_weight = 1
Training model: DT
Iteration: 0
In iteration_info, Iteration 0 Particel 0 results: [11.53396908 10.09222295]
In iteration_info, Iteration 0 Particel 1 results: [11.89440562  9.91200468]
In iteration_info, Iteration 0 Particel 2 results: [12.07462389 10.45265948]
In iteration_info, Iteration 0 Particel 3 results: [12.25484215  9.19113161]
In iteration_info, Iteration 0 Particel 4 results: [13.15593349 12.61527869]
In iteration_info, Iteration 0 Particel 5 results: [11.53396908 11.35375082]
In iteration_info, Iteration 0 Particel 6 results: [12.43506042 10.63287775]
In iteration_info, Iteration 0 Particel 7 results: [11.17353255 10.99331428]
In iteration_info, Iteration 0 Particel 8 results: [10.09222295  9.91200468]
In iteration_info, Iteration 0 Particel 9 results: [10.09222295  8.83069508]
In iteration_info, Iteration 0 Particel 10 results: [11.71418735  9.01091335]
In

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 1 Particel 3 results: [13.87680655 12.07462389]
In iteration_info, Iteration 1 Particel 4 results: [13.15593349 12.61527869]
In iteration_info, Iteration 1 Particel 5 results: [10.81309602 13.87680655]
In iteration_info, Iteration 1 Particel 6 results: [12.43506042 10.81309602]
In iteration_info, Iteration 1 Particel 7 results: [11.17353255 10.99331428]
In iteration_info, Iteration 1 Particel 8 results: [10.09222295  9.91200468]
In iteration_info, Iteration 1 Particel 9 results: [10.09222295  8.83069508]
In iteration_info, Iteration 1 Particel 10 results: [11.71418735  9.01091335]
In iteration_info, Iteration 1 Particel 11 results: [11.17353255 10.81309602]
In iteration_info, Iteration 1 Particel 12 results: [10.99331428  9.37134988]
In iteration_info, Iteration 1 Particel 13 results: [10.45265948  9.55156815]
In iteration_info, Iteration 1 Particel 14 results: [10.99331428 11.17353255]
In iteration_info, Iteration 1 Particel 15 results: [9.55156815 9.37134

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 2 Particel 7 results: [11.17353255 10.99331428]
In iteration_info, Iteration 2 Particel 8 results: [10.09222295  9.91200468]
In iteration_info, Iteration 2 Particel 9 results: [10.09222295  8.83069508]
In iteration_info, Iteration 2 Particel 10 results: [11.71418735  9.01091335]
In iteration_info, Iteration 2 Particel 11 results: [11.17353255 10.81309602]
In iteration_info, Iteration 2 Particel 12 results: [10.99331428  9.37134988]
In iteration_info, Iteration 2 Particel 13 results: [10.45265948  9.55156815]
In iteration_info, Iteration 2 Particel 14 results: [11.35375082 10.27244122]
In iteration_info, Iteration 2 Particel 15 results: [9.55156815 9.37134988]
In iteration_info, Iteration 2 Particel 16 results: [11.35375082 10.27244122]
In iteration_info, Iteration 2 Particel 17 results: [10.09222295 11.17353255]
In iteration_info, Iteration 2 Particel 18 results: [ 9.91200468 10.27244122]
In iteration_info, Iteration 2 Particel 19 results: [13.15593349 12.0

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 3 Particel 2 results: [11.89440562 10.99331428]
In iteration_info, Iteration 3 Particel 3 results: [13.87680655 12.07462389]
In iteration_info, Iteration 3 Particel 4 results: [11.17353255  9.73178642]
In iteration_info, Iteration 3 Particel 5 results: [14.05702482 10.81309602]
In iteration_info, Iteration 3 Particel 6 results: [12.07462389 11.17353255]
In iteration_info, Iteration 3 Particel 7 results: [11.17353255 10.99331428]
In iteration_info, Iteration 3 Particel 8 results: [10.09222295  9.91200468]
In iteration_info, Iteration 3 Particel 9 results: [ 9.37134988 10.99331428]
In iteration_info, Iteration 3 Particel 10 results: [11.71418735  9.01091335]
In iteration_info, Iteration 3 Particel 11 results: [11.17353255 10.81309602]
In iteration_info, Iteration 3 Particel 12 results: [12.61527869 10.45265948]
In iteration_info, Iteration 3 Particel 13 results: [ 8.29004028 12.61527869]
In iteration_info, Iteration 3 Particel 14 results: [10.63287775  9.1911

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 4 Particel 4 results: [11.17353255  9.73178642]
In iteration_info, Iteration 4 Particel 5 results: [10.99331428 10.09222295]
In iteration_info, Iteration 4 Particel 6 results: [12.07462389 11.17353255]
In iteration_info, Iteration 4 Particel 7 results: [ 9.37134988 10.81309602]
In iteration_info, Iteration 4 Particel 8 results: [ 9.73178642 11.71418735]
In iteration_info, Iteration 4 Particel 9 results: [11.17353255 12.25484215]
In iteration_info, Iteration 4 Particel 10 results: [11.71418735 10.81309602]
In iteration_info, Iteration 4 Particel 11 results: [12.07462389 12.07462389]
In iteration_info, Iteration 4 Particel 12 results: [10.99331428 11.53396908]
In iteration_info, Iteration 4 Particel 13 results: [10.99331428 11.71418735]
In iteration_info, Iteration 4 Particel 14 results: [11.35375082  9.19113161]
In iteration_info, Iteration 4 Particel 15 results: [12.25484215  9.19113161]
In iteration_info, Iteration 4 Particel 16 results: [12.43506042 10.09

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 5 Particel 0 results: [10.99331428 10.81309602]
In iteration_info, Iteration 5 Particel 1 results: [12.61527869 10.45265948]
In iteration_info, Iteration 5 Particel 2 results: [12.61527869 12.61527869]
In iteration_info, Iteration 5 Particel 3 results: [12.79549695  9.91200468]
In iteration_info, Iteration 5 Particel 4 results: [11.17353255  9.73178642]
In iteration_info, Iteration 5 Particel 5 results: [10.45265948 11.71418735]
In iteration_info, Iteration 5 Particel 6 results: [12.07462389 11.17353255]
In iteration_info, Iteration 5 Particel 7 results: [ 9.37134988 10.81309602]
In iteration_info, Iteration 5 Particel 8 results: [10.99331428 10.63287775]
In iteration_info, Iteration 5 Particel 9 results: [11.53396908 11.53396908]
In iteration_info, Iteration 5 Particel 10 results: [11.17353255 12.43506042]
In iteration_info, Iteration 5 Particel 11 results: [12.07462389 12.07462389]
In iteration_info, Iteration 5 Particel 12 results: [ 7.74938548 11.533969

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 6 Particel 3 results: [10.63287775 10.99331428]
In iteration_info, Iteration 6 Particel 4 results: [ 9.91200468 10.63287775]
In iteration_info, Iteration 6 Particel 5 results: [12.25484215  9.19113161]
In iteration_info, Iteration 6 Particel 6 results: [12.07462389 11.17353255]
In iteration_info, Iteration 6 Particel 7 results: [11.71418735  9.73178642]
In iteration_info, Iteration 6 Particel 8 results: [12.07462389 12.25484215]
In iteration_info, Iteration 6 Particel 9 results: [12.07462389 10.81309602]
In iteration_info, Iteration 6 Particel 10 results: [11.17353255 12.43506042]
In iteration_info, Iteration 6 Particel 11 results: [10.27244122 10.27244122]
In iteration_info, Iteration 6 Particel 12 results: [12.07462389 12.43506042]
In iteration_info, Iteration 6 Particel 13 results: [11.71418735 11.89440562]
In iteration_info, Iteration 6 Particel 14 results: [ 9.73178642 10.27244122]
In iteration_info, Iteration 6 Particel 15 results: [12.25484215  9.191

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 7 Particel 7 results: [12.43506042 12.07462389]
In iteration_info, Iteration 7 Particel 8 results: [12.25484215 12.25484215]
In iteration_info, Iteration 7 Particel 9 results: [12.97571522  9.55156815]
In iteration_info, Iteration 7 Particel 10 results: [11.35375082 10.09222295]
In iteration_info, Iteration 7 Particel 11 results: [11.53396908 10.09222295]
In iteration_info, Iteration 7 Particel 12 results: [12.07462389 12.25484215]
In iteration_info, Iteration 7 Particel 13 results: [ 8.83069508 10.63287775]
In iteration_info, Iteration 7 Particel 14 results: [ 9.73178642 10.27244122]
In iteration_info, Iteration 7 Particel 15 results: [11.35375082 10.63287775]
In iteration_info, Iteration 7 Particel 16 results: [12.25484215 11.89440562]
In iteration_info, Iteration 7 Particel 17 results: [10.63287775 10.99331428]
In iteration_info, Iteration 7 Particel 18 results: [10.63287775 12.97571522]
In iteration_info, Iteration 7 Particel 19 results: [10.45265948 10

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 8 Particel 3 results: [12.07462389 11.89440562]
In iteration_info, Iteration 8 Particel 4 results: [10.27244122 11.35375082]
In iteration_info, Iteration 8 Particel 5 results: [10.45265948 11.71418735]
In iteration_info, Iteration 8 Particel 6 results: [10.99331428  9.73178642]
In iteration_info, Iteration 8 Particel 7 results: [11.35375082  9.55156815]
In iteration_info, Iteration 8 Particel 8 results: [12.25484215 12.97571522]
In iteration_info, Iteration 8 Particel 9 results: [12.97571522  9.55156815]
In iteration_info, Iteration 8 Particel 10 results: [11.35375082 10.09222295]
In iteration_info, Iteration 8 Particel 11 results: [11.71418735 10.45265948]
In iteration_info, Iteration 8 Particel 12 results: [12.07462389 11.89440562]
In iteration_info, Iteration 8 Particel 13 results: [10.09222295 12.07462389]
In iteration_info, Iteration 8 Particel 14 results: [11.53396908 10.81309602]
In iteration_info, Iteration 8 Particel 15 results: [11.35375082 10.632

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 9 Particel 7 results: [10.81309602 11.89440562]
In iteration_info, Iteration 9 Particel 8 results: [11.17353255  9.91200468]
In iteration_info, Iteration 9 Particel 9 results: [10.45265948 12.43506042]
In iteration_info, Iteration 9 Particel 10 results: [ 9.73178642 11.53396908]
In iteration_info, Iteration 9 Particel 11 results: [9.73178642 8.83069508]
In iteration_info, Iteration 9 Particel 12 results: [10.99331428  9.37134988]
In iteration_info, Iteration 9 Particel 13 results: [11.89440562 10.81309602]
In iteration_info, Iteration 9 Particel 14 results: [11.71418735 10.63287775]
In iteration_info, Iteration 9 Particel 15 results: [11.35375082 10.63287775]
In iteration_info, Iteration 9 Particel 16 results: [11.35375082 10.99331428]
In iteration_info, Iteration 9 Particel 17 results: [9.55156815 9.37134988]
In iteration_info, Iteration 9 Particel 18 results: [12.07462389  9.91200468]
In iteration_info, Iteration 9 Particel 19 results: [10.81309602 11.894

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 10 Particel 1 results: [11.53396908 12.25484215]
In iteration_info, Iteration 10 Particel 2 results: [10.99331428 10.27244122]
In iteration_info, Iteration 10 Particel 3 results: [11.89440562 12.61527869]
In iteration_info, Iteration 10 Particel 4 results: [ 9.73178642 11.17353255]
In iteration_info, Iteration 10 Particel 5 results: [11.71418735 11.71418735]
In iteration_info, Iteration 10 Particel 6 results: [10.45265948 11.89440562]
In iteration_info, Iteration 10 Particel 7 results: [12.61527869 12.43506042]
In iteration_info, Iteration 10 Particel 8 results: [12.43506042 11.71418735]
In iteration_info, Iteration 10 Particel 9 results: [11.17353255 10.81309602]
In iteration_info, Iteration 10 Particel 10 results: [12.79549695 11.89440562]
In iteration_info, Iteration 10 Particel 11 results: [11.71418735 10.27244122]
In iteration_info, Iteration 10 Particel 12 results: [11.71418735 10.45265948]
In iteration_info, Iteration 10 Particel 13 results: [ 9.9120

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 11 Particel 5 results: [12.43506042 11.71418735]
In iteration_info, Iteration 11 Particel 6 results: [11.17353255 11.53396908]
In iteration_info, Iteration 11 Particel 7 results: [11.17353255 11.71418735]
In iteration_info, Iteration 11 Particel 8 results: [11.71418735 11.35375082]
In iteration_info, Iteration 11 Particel 9 results: [11.17353255 11.35375082]
In iteration_info, Iteration 11 Particel 10 results: [11.89440562 11.53396908]
In iteration_info, Iteration 11 Particel 11 results: [12.25484215 11.35375082]
In iteration_info, Iteration 11 Particel 12 results: [10.09222295 10.81309602]
In iteration_info, Iteration 11 Particel 13 results: [10.63287775 12.97571522]
In iteration_info, Iteration 11 Particel 14 results: [10.45265948 10.63287775]
In iteration_info, Iteration 11 Particel 15 results: [11.35375082 10.63287775]
In iteration_info, Iteration 11 Particel 16 results: [11.89440562 10.45265948]
In iteration_info, Iteration 11 Particel 17 results: [10.

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 12 Particel 1 results: [13.15593349 12.61527869]
In iteration_info, Iteration 12 Particel 2 results: [12.43506042 11.17353255]
In iteration_info, Iteration 12 Particel 3 results: [ 9.91200468 10.81309602]
In iteration_info, Iteration 12 Particel 4 results: [10.45265948 13.15593349]
In iteration_info, Iteration 12 Particel 5 results: [11.71418735 12.25484215]
In iteration_info, Iteration 12 Particel 6 results: [11.71418735 11.89440562]
In iteration_info, Iteration 12 Particel 7 results: [ 9.91200468 11.17353255]
In iteration_info, Iteration 12 Particel 8 results: [12.25484215 11.35375082]
In iteration_info, Iteration 12 Particel 9 results: [11.71418735 11.89440562]
In iteration_info, Iteration 12 Particel 10 results: [11.35375082 12.07462389]
In iteration_info, Iteration 12 Particel 11 results: [11.35375082 10.99331428]
In iteration_info, Iteration 12 Particel 12 results: [ 9.19113161 14.05702482]
In iteration_info, Iteration 12 Particel 13 results: [13.1559

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 13 Particel 5 results: [11.17353255 12.43506042]
In iteration_info, Iteration 13 Particel 6 results: [10.45265948 11.71418735]
In iteration_info, Iteration 13 Particel 7 results: [11.53396908 13.33615175]
In iteration_info, Iteration 13 Particel 8 results: [11.17353255 12.07462389]
In iteration_info, Iteration 13 Particel 9 results: [12.97571522 11.89440562]
In iteration_info, Iteration 13 Particel 10 results: [10.99331428 10.09222295]
In iteration_info, Iteration 13 Particel 11 results: [13.87680655 10.63287775]
In iteration_info, Iteration 13 Particel 12 results: [ 9.55156815 11.17353255]
In iteration_info, Iteration 13 Particel 13 results: [10.45265948 12.61527869]
In iteration_info, Iteration 13 Particel 14 results: [ 9.19113161 11.35375082]
In iteration_info, Iteration 13 Particel 15 results: [11.35375082 10.63287775]
In iteration_info, Iteration 13 Particel 16 results: [12.43506042 12.07462389]
In iteration_info, Iteration 13 Particel 17 results: [10.

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 14 Particel 0 results: [10.09222295 11.89440562]
In iteration_info, Iteration 14 Particel 1 results: [12.97571522 12.25484215]
In iteration_info, Iteration 14 Particel 2 results: [11.53396908  9.73178642]
In iteration_info, Iteration 14 Particel 3 results: [12.43506042  9.73178642]
In iteration_info, Iteration 14 Particel 4 results: [11.17353255 11.17353255]
In iteration_info, Iteration 14 Particel 5 results: [ 8.29004028 11.17353255]
In iteration_info, Iteration 14 Particel 6 results: [10.63287775 11.35375082]
In iteration_info, Iteration 14 Particel 7 results: [11.53396908 13.33615175]
In iteration_info, Iteration 14 Particel 8 results: [10.45265948 11.53396908]
In iteration_info, Iteration 14 Particel 9 results: [ 9.73178642 10.99331428]
In iteration_info, Iteration 14 Particel 10 results: [11.35375082 12.07462389]
In iteration_info, Iteration 14 Particel 11 results: [12.43506042 10.45265948]
In iteration_info, Iteration 14 Particel 12 results: [10.45265

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 15 Particel 4 results: [11.35375082 13.15593349]
In iteration_info, Iteration 15 Particel 5 results: [11.71418735 13.51637002]
In iteration_info, Iteration 15 Particel 6 results: [11.89440562 14.41746136]
In iteration_info, Iteration 15 Particel 7 results: [10.27244122  9.91200468]
In iteration_info, Iteration 15 Particel 8 results: [12.43506042 11.89440562]
In iteration_info, Iteration 15 Particel 9 results: [10.99331428 11.89440562]
In iteration_info, Iteration 15 Particel 10 results: [14.41746136 12.97571522]
In iteration_info, Iteration 15 Particel 11 results: [10.99331428 11.89440562]
In iteration_info, Iteration 15 Particel 12 results: [13.15593349 12.07462389]
In iteration_info, Iteration 15 Particel 13 results: [10.27244122 11.71418735]
In iteration_info, Iteration 15 Particel 14 results: [10.63287775  9.37134988]
In iteration_info, Iteration 15 Particel 15 results: [11.35375082 10.63287775]
In iteration_info, Iteration 15 Particel 16 results: [11.5

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 16 Particel 0 results: [11.53396908 11.17353255]
In iteration_info, Iteration 16 Particel 1 results: [10.63287775 10.99331428]
In iteration_info, Iteration 16 Particel 2 results: [11.89440562 10.81309602]
In iteration_info, Iteration 16 Particel 3 results: [12.25484215 10.27244122]
In iteration_info, Iteration 16 Particel 4 results: [12.43506042 10.45265948]
In iteration_info, Iteration 16 Particel 5 results: [11.35375082  9.55156815]
In iteration_info, Iteration 16 Particel 6 results: [10.81309602 12.97571522]
In iteration_info, Iteration 16 Particel 7 results: [12.07462389 13.33615175]
In iteration_info, Iteration 16 Particel 8 results: [12.07462389 12.07462389]
In iteration_info, Iteration 16 Particel 9 results: [10.99331428 11.89440562]
In iteration_info, Iteration 16 Particel 10 results: [ 9.91200468 10.63287775]
In iteration_info, Iteration 16 Particel 11 results: [10.09222295 10.81309602]
In iteration_info, Iteration 16 Particel 12 results: [11.71418

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 17 Particel 4 results: [14.05702482 10.63287775]
In iteration_info, Iteration 17 Particel 5 results: [13.33615175 11.89440562]
In iteration_info, Iteration 17 Particel 6 results: [12.61527869 10.63287775]
In iteration_info, Iteration 17 Particel 7 results: [12.07462389 13.33615175]
In iteration_info, Iteration 17 Particel 8 results: [12.61527869 12.25484215]
In iteration_info, Iteration 17 Particel 9 results: [13.51637002 11.53396908]
In iteration_info, Iteration 17 Particel 10 results: [13.69658829 10.81309602]
In iteration_info, Iteration 17 Particel 11 results: [13.15593349  9.37134988]
In iteration_info, Iteration 17 Particel 12 results: [ 9.91200468 11.35375082]
In iteration_info, Iteration 17 Particel 13 results: [12.07462389  7.56916721]
In iteration_info, Iteration 17 Particel 14 results: [12.61527869 11.71418735]
In iteration_info, Iteration 17 Particel 15 results: [11.35375082 10.63287775]
In iteration_info, Iteration 17 Particel 16 results: [10.4

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 18 Particel 0 results: [11.53396908  9.91200468]
In iteration_info, Iteration 18 Particel 1 results: [11.89440562 13.33615175]
In iteration_info, Iteration 18 Particel 2 results: [12.25484215 11.71418735]
In iteration_info, Iteration 18 Particel 3 results: [12.25484215 12.07462389]
In iteration_info, Iteration 18 Particel 4 results: [12.07462389 11.71418735]
In iteration_info, Iteration 18 Particel 5 results: [11.89440562 11.71418735]
In iteration_info, Iteration 18 Particel 6 results: [10.09222295 12.25484215]
In iteration_info, Iteration 18 Particel 7 results: [10.63287775 13.15593349]
In iteration_info, Iteration 18 Particel 8 results: [11.35375082 12.25484215]
In iteration_info, Iteration 18 Particel 9 results: [10.81309602  9.55156815]
In iteration_info, Iteration 18 Particel 10 results: [12.25484215 11.35375082]
In iteration_info, Iteration 18 Particel 11 results: [10.63287775 12.97571522]
In iteration_info, Iteration 18 Particel 12 results: [12.97571

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In iteration_info, Iteration 19 Particel 4 results: [12.61527869 11.71418735]
In iteration_info, Iteration 19 Particel 5 results: [11.35375082 10.63287775]
In iteration_info, Iteration 19 Particel 6 results: [11.53396908 10.09222295]
In iteration_info, Iteration 19 Particel 7 results: [10.27244122 12.79549695]
In iteration_info, Iteration 19 Particel 8 results: [12.07462389  9.73178642]
In iteration_info, Iteration 19 Particel 9 results: [10.99331428 11.35375082]
In iteration_info, Iteration 19 Particel 10 results: [12.43506042 11.89440562]
In iteration_info, Iteration 19 Particel 11 results: [10.63287775 12.97571522]
In iteration_info, Iteration 19 Particel 12 results: [10.09222295 10.45265948]
In iteration_info, Iteration 19 Particel 13 results: [10.27244122 13.51637002]
In iteration_info, Iteration 19 Particel 14 results: [10.63287775  9.37134988]
In iteration_info, Iteration 19 Particel 15 results: [11.35375082 10.63287775]
In iteration_info, Iteration 19 Particel 16 results: [10.4

  particle_numeric[mask_low, col_idx] = column_means[col_idx]
  particle_numeric[mask_high, col_idx] = column_means[col_idx]


In [10]:
results

[[{'ML model': 'DT',
   'Iteration': 0,
   'Particle': 0,
   'Particle centroid': array([[44, 'male mar/wid', 'unemp/unskilled non res'],
          [44, 'male div/sep', 'skilled'],
          [30, 'female div/dep/mar', 'unskilled resident']], dtype=object),
   'Accuracy': array([0.68, 0.72]),
   'Precision': array([0.71014493, 0.81203008]),
   'Recall': array([0.80327869, 0.77697842]),
   'F1 score': array([0.75384615, 0.79411765]),
   'AUC': array([0.64522909, 0.68357118]),
   'Entropy-Loss': array([11.53396908, 10.09222295]),
   'TP': array([ 98., 108.]),
   'TN': array([38., 36.]),
   'FP': array([40., 25.]),
   'FN': array([24., 31.]),
   'num_clusters': 3,
   'num_valid_clusters': 3,
   'num_violated_clusters': 0,
   'num_violating_records_before_adjusting': 0,
   'num_retained_records': 60,
   'num_excess_records': 940,
   'num_fixed_clusters': 0,
   'num_used_excess': 0,
   'num_remaining_excess': 940,
   'num_unfixed_clusters': 0,
   'num_total_violating_records_after_adjusting'

In [11]:
# save the results to a file
results_df = pd.DataFrame(results)
filedirectory = '/Users/yusiwei/Library/CloudStorage/OneDrive-Personal/research/Fourth Year Paper/Experiments/2nd experiments/Experiment results/V2/Iteration Tracking Info'
filename = f"DT_Anonymization_16.csv"
filename = os.path.join(filedirectory, filename)
results_df.to_csv(filename, index=False)
        