In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from scipy.spatial.distance import euclidean
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.base import BaseEstimator, ClusterMixin
from sklearn.pipeline import make_pipeline
from sklearn.neighbors import KNeighborsClassifier
from scipy.spatial.distance import euclidean, cityblock, minkowski


from sklearn.datasets import load_iris
from sklearn.datasets import load_wine

import seaborn as sns
import matplotlib.pyplot as plt

from typing import Dict, Literal, Optional, Union
import numpy.typing as npt

from collections import namedtuple


from sklearn.base import BaseEstimator, ClusterMixin

import optuna
from optuna.samplers import TPESampler



### Инициализация нескольких датасетов

Датасет "WineQuality" (не из sklearn)

In [2]:
df_wine = pd.read_csv("datasets/winequality-red.csv")
X_wine = df_wine.drop("quality", axis = 1 )
y_wine = df_wine["quality"]

scaler_wine = MinMaxScaler()
scalered_wine = scaler_wine.fit_transform(X_wine)

x_train_wine, x_test_wine, y_train_wine, y_test_wine = train_test_split(np.array(scalered_wine), np.array(y_wine), test_size= 0.3, random_state= 42) # Нормализованные данные
x_train_wines, x_test_wines, y_train_wines, y_test_wines = train_test_split(np.array(X_wine), np.array(y_wine), test_size= 0.3, random_state= 42) 

Датасет "Ирисы"

In [3]:
df_iris = load_iris()
X_iris = df_iris.data
y_iris = df_iris.target

scaler_iris = MinMaxScaler()
scalered_iris = scaler_iris.fit_transform(X_iris)

x_train_iris, x_test_iris, y_train_iris, y_test_iris = train_test_split(scalered_iris, y_iris, test_size= 0.3, random_state= 42) # Нормализованные данные
x_train_iriss, x_test_iriss, y_train_iriss, y_test_iriss = train_test_split(X_iris, y_iris, test_size= 0.3, random_state= 42) 


Датасет "Исследование заболеваний сердца"

In [4]:
df_heart = pd.read_csv("datasets/heart_failure_clinical_records.csv")

X_heart = np.array(df_heart.drop("DEATH_EVENT", axis = 1))
y_heart = np.array(df_heart["DEATH_EVENT"])

scaler_heart = MinMaxScaler()
scalered_heart = scaler_heart.fit_transform(X_heart)

x_train_heart, x_test_heart, y_train_heart, y_test_heart = train_test_split(np.array(scalered_heart),np.array(y_heart), test_size= 0.3, random_state= 42)
x_train_hearts, x_test_hearts, y_train_hearts, y_test_hearts = train_test_split(np.array(X_heart),np.array(y_heart), test_size= 0.3, random_state= 42)


Датасет "Исследования заболевания сердца"

In [59]:
df_cardi = pd.read_csv("datasets/Cardiovascular_Disease_Dataset.csv")

X_cardi = np.array(df_cardi.drop("target", axis = 1))
y_cardi = np.array(df_cardi["target"])

scaler_cardi = MinMaxScaler()
scalered_cardi = scaler_cardi.fit_transform(X_cardi)

x_train_cardi, x_test_cardi, y_train_cardi, y_test_cardi = train_test_split(np.array(scalered_cardi),np.array(y_cardi), test_size= 0.3, random_state= 42)
x_train_cardis, x_test_cardis, y_train_cardis, y_test_cardis = train_test_split(np.array(X_cardi),np.array(y_cardi), test_size= 0.3, random_state= 42)



### Реализация алгоритма Искусственной иммунной системы с отрицательным отбором

In [6]:
class RNSA:
    
    def __init__(self, N = 100, r = 0.05, r_s = 0.0001, k = 1, metric: Literal["manhattan", "minkowski", "euclidean"] = "euclidean", 
                 max_discards = 1000, seed = None, p = 2, **kwargs: Dict[str, Union[bool, str, float]]):
        
        if metric == "manhattan" or metric == "minkowski":
            self.metric: str = metric
        else:
            self.metric: str = "euclidean"
            self.p: float = p
        
        if seed is not None and isinstance(seed, int):
            np.random.seed(seed)
            self.seed: int = seed
        else:
            self.seed: int = seed
        
        self._Detector = namedtuple("Detector", "position radius")

        if max_discards > 0:
            self.max_discards: int = max_discards
        else:
            self.max_discards: int = 1000

        if k < 1:
            self.k: int = 1
        else:
            self.k: int = k

        if N < 1:
            self.N: int = 100
        else:
            self.N: int = N

        if r < 0:
            self.r: float = 0.05
        else:
            self.r: float = r

        if r_s > 0:
            self.r_s: float = r_s
        else:
            self.r_s: float = 0

        self.p = kwargs.get("p", 2)
        self.cell_bounds = kwargs.get("cell_bounds", False)
        self.non_self_label = kwargs.get("non_self_label", "non-self")
        self.detectors: Union[dict, None] = None
        self.classes: npt.NDArray = None
    
    def _check_and_raise_exception_fit(self, X: npt.NDArray = None, y: npt.NDArray = None, 
                                       _class_ = "RNSA"):
        
        if not isinstance(X, np.ndarray):
            if isinstance(X, list):
                X = np.array(X)
            else:
                raise TypeError("X is not an ndarray or list.")
        elif not isinstance(y, np.ndarray):
            if isinstance(y, list):
                y = np.array(y)
            else:
                raise TypeError("y is not an ndarray or list.")
        if X.shape[0] != y.shape[0]:
            raise TypeError(
                "X does not have the same amount of sample for the output classes in y."
            )

    def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True):


        self.classes = np.unique(y)
        list_detectors_by_class = dict()
        sample_index = self.__slice_index_list_by_class(y)

        if verbose:
            progress = tqdm(total=int(self.N * (len(self.classes))), 
                            bar_format="{desc} ┇{bar}┇ {n}/{total} detectors", postfix="\n",)
            
        for _class_ in self.classes:
            valid_detectors_set = []
            discard_count = 0

            if verbose:
                progress.set_description_str(f"Generating the detectors for the {_class_} class:")

            while len(valid_detectors_set) < self.N:

                vector_x = np.random.random_sample(size = X.shape[1])
                valid_detector = self.__checks_valid_detector(X, vector_x, sample_index_class= sample_index[_class_])
                
                if valid_detector is not False:

                    discard_count = 0
                    valid_detectors_set.append(self._Detector(vector_x, valid_detector[1]))

                    if verbose:
                        progress.update(1)
                elif valid_detector:
                    discard_count = 0
                    valid_detectors_set.append(self._Detector(vector_x))
                    if verbose:
                        progress.update(1)
                else:
                    discard_count += 1
                    if discard_count == self.max_discards:
                        raise Exception(
                            "An error has been identified:\n"
                            f"the maximum number of discards of detectors for the {_class_} class "
                            "has been reached.\nIt is recommended to check the defined radius and "
                            "consider reducing its value."
                        )
            list_detectors_by_class[_class_] = valid_detectors_set
        if verbose:
            progress.set_description(
                f'\033[92m✔ Non-self detectors for classes ({", ".join(map(str, self.classes))}) '
                f'successfully generated\033[0m'
            )
        self.detectors = list_detectors_by_class
        return self
    
    def predict(self, X):

        if self.detectors is None:
            return None
        elif not isinstance(X, (np.ndarray, list)):
            raise TypeError("X is not an ndarray or list")
        elif len(self.detectors[self.classes[0]][0].position) != len(X[0]):
            raise Exception(
                "X does not have {} features to make the prediction".format(
                    len(self.detectors[self.classes[0]][0])
                )
            )
        
        C = np.empty(shape = 0)
        for line in X:
            class_found: bool
            _class_ = self.__compare_sample_to_detectors(line)
            if _class_ is None:
                class_found = False
            else:
                C = np.append(C, [_class_])
                class_found = True

            if not class_found and len(self.classes) == 1:
                C = np.append(C, [self.non_self_label])
            elif not class_found:
                average_distance = {}
                for _class_ in self.classes:
                    detectores = list(map(lambda x: x.position, self.detectors[_class_]))
                    average_distance[_class_] = np.average([self.__distance(detector, line) for detector in detectores])
                C = np.append(C, [max(average_distance, key = average_distance.get)])
        return C

    def __compare_sample_to_detectors(self, line):
        possible_classes = []
        for _class_ in self.classes:
            class_found: bool = True
            sum_distance = 0
            for detector in self.detectors[_class_]:
                distance = self.__distance(detector.position, line)

                sum_distance += distance
                if distance < detector.radius:
                    class_found = False
                    break
            if class_found:
                possible_classes.append([_class_, sum_distance / self.N])
        if len(possible_classes) == 1:
            return possible_classes[0][0]
        elif len(possible_classes) > 1:
            return max(possible_classes, key = lambda x: x[1])[0]
        else:
            return None

                
      
    def __slice_index_list_by_class(self, y) -> Dict:

        position_samples = dict()
        for _class_ in self.classes:
            position_samples[_class_] = list(np.where(y == _class_)[0])

        return position_samples            
    
    def __checks_valid_detector(self, X = None, vector_x = None, sample_index_class = None):

        if (np.size(sample_index_class) == 0 or np.size(X) == 0 or np.size(vector_x) == 0):
            return False
        
        if self.k > 1:
            knn_list = np.empty(shape = 0)
            for i in sample_index_class:
                knn_list = self.__compare_KnearestNeighbors_List(knn_list, self.__distance(X[i], vector_x))

            distance_mean = np.mean(knn_list)

            return self.__detector_is_valid_to_Vdetector(distance_mean, vector_x)
        
        else:
            distance: Union[float, None] = None
            for i in sample_index_class:
                new_distance = self.__distance(X[i], vector_x)
                if distance is None:
                    distance = new_distance
                elif distance > new_distance:
                    distance = new_distance
                
            return self.__detector_is_valid_to_Vdetector(distance, vector_x), True
        
        # return False
        

    def __compare_KnearestNeighbors_List(self, knn, distance) -> npt.NDArray:
        
        if len(knn) < self.k:
             knn = np.append(knn, distance)
             knn.sort()
        else:
            if knn[self.k - 1] > distance:
                knn[self.k - 1] = distance
                knn.sort()

        return knn
    
    def __distance(self, u, v):

        if self.metric == "manhattan":
            return cityblock(u, v)
        elif self.metric == "minkowski":
            return minkowski(u, v, self.p)
        else:
            return euclidean(u, v)
        
    def __detector_is_valid_to_Vdetector(self, distance, vector_x):

        new_detector_r = float(distance - self.r_s)
        if self.r > new_detector_r:
            return False
        else:
            return True, new_detector_r
        
    def get_params(self, deep: bool = True) -> dict:
        return {
            "N": self.N,
            "r": self.r,
            "k": self.k,
            "metric": self.metric,
            "seed": self.seed,
            # "algorithm": self._algorithm,
            "r_s": self.r_s,
            # "cell_bounds": self._cell_bounds,
            "p": self.p,
        }


#### Инициализация модели

In [14]:
rnsa_wine = RNSA()
rnsa_iris = RNSA()
rnsa_heart = RNSA()
rnsa_norm_cardi = RNSA()
rnsa_non_norm_cardi = RNSA()


### Тестирование модели с нормализованными данными на гиперпараметрах (по умолчанию)

In [15]:
def check_models(model, X_tr, y_tr, X_ts, y_ts):
    model.fit(X_tr,y_tr)
    model_pred = model.predict(X_ts)
    model_acc = accuracy_score(y_ts, model_pred)

    return model_acc

In [16]:
acc_wine = check_models(rnsa_wine, x_train_wine, y_train_wine, x_test_wine, y_test_wine)
acc_iris = check_models(rnsa_iris, x_train_iris, y_train_iris, x_test_iris, y_test_iris)
acc_heart = check_models(rnsa_heart, x_train_heart, y_train_heart, x_test_heart, y_test_heart)
acc_norm_cardi = check_models(rnsa_norm_cardi, x_train_cardi, y_train_cardi, x_test_cardi, y_test_cardi)


[92m✔ Non-self detectors for classes (3, 4, 5, 6, 7, 8) successfully generated[0m:  ┇██████████┇ 600/600 detectors
[92m✔ Non-self detectors for classes (0, 1, 2) successfully generated[0m:  ┇██████████┇ 300/300 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 200/200 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 200/200 detectors


### Точность моделей на нормализованных данных

In [17]:
print(f"Точность модели на датасете wine: {acc_wine}")
print(f"Точность модели на датасете iris: {acc_iris}")
print(f"Точность модели на датасете heart: {acc_heart}")
print(f"Точность модели на датасете cardi: {acc_norm_cardi}")


Точность модели на датасете wine: 0.3104166666666667
Точность модели на датасете iris: 0.28888888888888886
Точность модели на датасете heart: 0.42133333333333334
Точность модели на датасете cardi: 0.44333333333333336


### Тестирование модели с ненормализованными данными на гиперпараметрах (по умолчанию)

In [18]:
acc_wines = check_models(rnsa_wine, x_train_wines, y_train_wines, x_test_wines, y_test_wines)
acc_iriss = check_models(rnsa_iris, x_train_iriss, y_train_iriss, x_test_iriss, y_test_iriss)
acc_hearts = check_models(rnsa_heart, x_train_hearts, y_train_hearts, x_test_hearts, y_test_hearts)
acc_non_norm_cardi = check_models(rnsa_non_norm_cardi, x_train_cardis, y_train_cardis, x_test_cardis, y_test_cardis)

[92m✔ Non-self detectors for classes (3, 4, 5, 6, 7, 8) successfully generated[0m:  ┇██████████┇ 600/600 detectors
[92m✔ Non-self detectors for classes (0, 1, 2) successfully generated[0m:  ┇██████████┇ 300/300 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 200/200 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 200/200 detectors


### Точность моделей на ненормализованных данных

In [19]:
print(f"Точность модели на датасете wine: {acc_wines}")
print(f"Точность модели на датасете iris: {acc_iriss}")
print(f"Точность модели на датасете heart: {acc_hearts}")
print(f"Точность модели на датасете cardi: {acc_non_norm_cardi}")


Точность модели на датасете wine: 0.22083333333333333
Точность модели на датасете iris: 0.28888888888888886
Точность модели на датасете heart: 0.30666666666666664
Точность модели на датасете cardi: 0.46


### Реализация функции - обертки для запуска оптимизации гиперпараметров модели на разных датасетах

In [20]:
def objective(trial, X, y):
    N = trial.suggest_int('N', 10, 400)
    r = trial.suggest_float('r', 0.05, 0.7)
    r_s = trial.suggest_float('r_s', 1e-6, 1e-3, log=True)
    k = trial.suggest_int('k', 1, 7)
    metric = trial.suggest_categorical('metric', ['manhattan', 'euclidean'])
    
    model = RNSA(N=N, r=r, r_s=r_s, k=k, metric= metric, seed= 123)
    
    pipeline = make_pipeline(MinMaxScaler(), model)
    score = cross_val_score(pipeline, X, y, cv=5, scoring='accuracy').mean()
    return score

# Функция для запуска оптимизации
def run_optimization(X, y, n_trials=100):
    study = optuna.create_study(direction='maximize', sampler=TPESampler())
    study.optimize(lambda trial: objective(trial, X, y), n_trials=n_trials)
    return study.best_params, study.best_value

In [41]:
params_wine, score_wine = run_optimization(x_train_wine, y_train_wine)

[I 2024-05-21 01:51:43,400] A new study created in memory with name: no-name-0eefaa3f-c5c7-425e-8b51-be9f951b3432
[92m✔ Non-self detectors for classes (3, 4, 5, 6, 7, 8) successfully generated[0m:  ┇██████████┇ 2004/2004 detectors
[92m✔ Non-self detectors for classes (3, 4, 5, 6, 7, 8) successfully generated[0m:  ┇██████████┇ 2004/2004 detectors
[92m✔ Non-self detectors for classes (3, 4, 5, 6, 7, 8) successfully generated[0m:  ┇██████████┇ 2004/2004 detectors
[92m✔ Non-self detectors for classes (3, 4, 5, 6, 7, 8) successfully generated[0m:  ┇██████████┇ 2004/2004 detectors
[92m✔ Non-self detectors for classes (3, 4, 5, 6, 7, 8) successfully generated[0m:  ┇██████████┇ 2004/2004 detectors
[I 2024-05-21 01:52:10,944] Trial 0 finished with value: 0.43257126841768095 and parameters: {'N': 334, 'r': 0.5502511778543719, 'r_s': 4.580353489781831e-05, 'k': 5, 'metric': 'euclidean'}. Best is trial 0 with value: 0.43257126841768095.
[92m✔ Non-self detectors for classes (3, 4, 5, 6, 

In [40]:
params_iris, score_iris = run_optimization(x_train_iris, y_train_iris)

[I 2024-05-21 01:47:41,829] A new study created in memory with name: no-name-32c553e5-02e3-4578-b43f-e630b055075b
[92m✔ Non-self detectors for classes (0, 1, 2) successfully generated[0m:  ┇██████████┇ 342/342 detectors
[92m✔ Non-self detectors for classes (0, 1, 2) successfully generated[0m:  ┇██████████┇ 342/342 detectors
[92m✔ Non-self detectors for classes (0, 1, 2) successfully generated[0m:  ┇██████████┇ 342/342 detectors
[92m✔ Non-self detectors for classes (0, 1, 2) successfully generated[0m:  ┇██████████┇ 342/342 detectors
[92m✔ Non-self detectors for classes (0, 1, 2) successfully generated[0m:  ┇██████████┇ 342/342 detectors
[I 2024-05-21 01:47:44,731] Trial 0 finished with value: 0.8476190476190476 and parameters: {'N': 114, 'r': 0.647152486337575, 'r_s': 0.0005563202017324864, 'k': 5, 'metric': 'euclidean'}. Best is trial 0 with value: 0.8476190476190476.
[92m✔ Non-self detectors for classes (0, 1, 2) successfully generated[0m:  ┇██████████┇ 654/654 detectors


In [42]:
params_heart, score_heart = run_optimization(x_train_heart, y_train_heart)

[I 2024-05-21 02:08:58,690] A new study created in memory with name: no-name-824291d1-e275-413c-aefc-4c80ad8f7c03
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 96/96 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 96/96 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 96/96 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 96/96 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 96/96 detectors
[I 2024-05-21 02:09:08,305] Trial 0 finished with value: 0.5085714285714286 and parameters: {'N': 48, 'r': 0.6442901460099906, 'r_s': 4.197406575524472e-05, 'k': 6, 'metric': 'euclidean'}. Best is trial 0 with value: 0.5085714285714286.
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 618/618 detectors
[92m✔ Non-self detectors for

In [22]:
params_cardi, score_cardi = run_optimization(x_train_cardi, y_train_cardi)

[I 2024-05-21 14:28:58,077] A new study created in memory with name: no-name-aff6ea47-c05b-4fe6-9dc9-34b44abca46f
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 308/308 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 308/308 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 308/308 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 308/308 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 308/308 detectors
[I 2024-05-21 14:29:01,668] Trial 0 finished with value: 0.45999999999999996 and parameters: {'N': 154, 'r': 0.13515875023114876, 'r_s': 2.2750159360808404e-06, 'k': 1, 'metric': 'manhattan'}. Best is trial 0 with value: 0.45999999999999996.
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 566/566 detectors
[92m✔ Non-sel

In [23]:
params_cardis, score_cardis = run_optimization(x_train_cardis, y_train_cardis)

[I 2024-05-21 14:33:37,383] A new study created in memory with name: no-name-39f95794-cdf2-4b2b-84e3-8f0df1fcda60
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 118/118 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 118/118 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 118/118 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 118/118 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 118/118 detectors
[I 2024-05-21 14:33:39,920] Trial 0 finished with value: 0.6971428571428572 and parameters: {'N': 59, 'r': 0.5749680033208119, 'r_s': 7.627890980253285e-05, 'k': 2, 'metric': 'euclidean'}. Best is trial 0 with value: 0.6971428571428572.
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 738/738 detectors
[92m✔ Non-self det

In [None]:
print(f" Wine {params_wine}, {score_wine}")
print(f" Iris {params_iris}, {score_iris}")
print(f" Heart{params_heart}, {score_heart}")

 Wine {'N': 78, 'r': 0.6006509714553963, 'r_s': 1.784083452802209e-05, 'k': 3, 'metric': 'manhattan'}, 0.4673886931454196
 Iris {'N': 345, 'r': 0.546667519968013, 'r_s': 1.2925313570472918e-06, 'k': 3, 'metric': 'manhattan'}, 0.9523809523809523
 Heart{'N': 388, 'r': 0.23450562473448786, 'r_s': 0.0003052541298962591, 'k': 3, 'metric': 'manhattan'}, 0.8868571428571428


In [24]:
print(f" Cardi{params_cardi}, {score_cardi}")
print(f" Cardi{params_cardis}, {score_cardis}")

 Cardi{'N': 16, 'r': 0.5727666954055186, 'r_s': 2.4656090628037976e-06, 'k': 7, 'metric': 'manhattan'}, 0.8271428571428571
 Cardi{'N': 11, 'r': 0.496833746955271, 'r_s': 0.0006555269711389238, 'k': 2, 'metric': 'euclidean'}, 0.8171428571428571


In [50]:
rnsa_wine_hyper = RNSA(N = 78, r = 0.6006509714553963, r_s = 1.784083452802209e-05, k = 3, metric="euclidean", seed= 123)
rnsa_iris_hyper = RNSA(N = 345, r = 0.546667519968013, r_s = 1.2925313570472918e-06, k = 3, metric="manhattan", seed= 123)
rnsa_heart_hyper = RNSA(N = 388, r = 0.23450562473448786, r_s = 0.0003052541298962591, k = 3, metric="manhattan", seed= 123)
rnsa_cardi_hyper = RNSA(N = 16, r = 0.5727666954055186, r_s = 2.4656090628037976e-06, k = 7, metric= "manhattan", seed = 123 )
rnsa_cardis_hyper = RNSA(N = 11, r = 0.496833746955271, r_s = 0.0006555269711389238, k = 2, metric= "manhattan", seed = 123 )

acc_wines_hyper = check_models(rnsa_wine_hyper, x_train_wine, y_train_wine, x_test_wine, y_test_wine)
acc_iriss_hyper = check_models(rnsa_iris_hyper, x_train_iris, y_train_iris, x_test_iris, y_test_iris)
acc_hearts_hyper = check_models(rnsa_heart_hyper, x_train_heart, y_train_heart, x_test_heart, y_test_heart)
acc_cardi_opti = check_models(rnsa_cardi_hyper, x_train_cardi, y_train_cardi, x_test_cardi, y_test_cardi)
acc_cardis_opti = check_models(rnsa_cardis_hyper, x_train_cardis, y_train_cardis, x_test_cardis, y_test_cardis)


[92m✔ Non-self detectors for classes (3, 4, 5, 6, 7, 8) successfully generated[0m:  ┇██████████┇ 468/468 detectors
[92m✔ Non-self detectors for classes (0, 1, 2) successfully generated[0m:  ┇██████████┇ 1035/1035 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 776/776 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 32/32 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 22/22 detectors


In [45]:
print(acc_wines_hyper)
print(acc_iriss_hyper)
print(acc_hearts_hyper)

print(acc_cardi_opti)
print(acc_cardis_opti)

0.40625
0.9333333333333333
0.8186666666666667
0.71
0.6633333333333333


In [48]:
model_knn = KNeighborsClassifier(n_neighbors=1)

acc_knn_wine = check_models(model_knn, x_train_wine, y_train_wine, x_test_wine, y_test_wine)
acc_knn_iris = check_models(model_knn, x_train_iris, y_train_iris, x_test_iris, y_test_iris)
acc_knn_heart = check_models(model_knn, x_train_heart, y_train_heart, x_test_heart, y_test_heart)
acc_knn_cardi = check_models(model_knn, x_train_cardi, y_train_cardi, x_test_cardi, y_test_cardi)

acc_knn_wines = check_models(model_knn, x_train_wines, y_train_wines, x_test_wines, y_test_wines)
acc_knn_iriss = check_models(model_knn, x_train_iriss, y_train_iriss, x_test_iriss, y_test_iriss)
acc_knn_hearts = check_models(model_knn, x_train_hearts, y_train_hearts, x_test_hearts, y_test_hearts)
acc_knn_cardis = check_models(model_knn, x_train_cardis, y_train_cardis, x_test_cardis, y_test_cardis)

print(f"Точность модели kNN на датасете wine: {acc_knn_wine}")
print(f"Точность модели kNN на датасете iris: {acc_knn_iris}")
print(f"Точность модели kNN на датасете heart: {acc_knn_heart}")
print(f"Точность модели kNN на датасете cardi: {acc_knn_cardi} \n")

print(f"Точность модели kNN на не нормализованных на датасете wine: {acc_knn_wines}")
print(f"Точность модели kNN на не нормализованных данных на датасете iris: {acc_knn_iriss}")
print(f"Точность модели kNN на не нормализованных данных на датасете heart: {acc_knn_hearts}")
print(f"Точность модели kNN на не нормализованных данных на датасете cardi: {acc_knn_cardis}")


Точность модели kNN на датасете wine: 0.60625
Точность модели kNN на датасете iris: 1.0
Точность модели kNN на датасете heart: 0.9713333333333334
Точность модели kNN на датасете cardi: 0.93 

Точность модели kNN на не нормализованных на датасете wine: 0.525
Точность модели kNN на не нормализованных данных на датасете iris: 1.0
Точность модели kNN на не нормализованных данных на датасете heart: 0.9433333333333334
Точность модели kNN на не нормализованных данных на датасете cardi: 0.51


### Тестирование алгоритма Иммунной системы на зашумленных данных

In [28]:
def add_gaussian_noise(data, mean=0, std_dev=1):
    noise = np.random.normal(mean, std_dev, data.shape)
    noisy_data = data + noise
    return noisy_data

In [49]:
x_test_wine_noisy = add_gaussian_noise(x_test_wine)
x_test_iris_noisy = add_gaussian_noise(x_test_iris)
x_test_heart_noisy = add_gaussian_noise(x_test_heart)
x_test_cardi_noisy = add_gaussian_noise(x_test_cardi)

In [51]:
acc_wine_noisy = check_models(rnsa_wine_hyper, x_train_wine, y_train_wine, x_test_wine_noisy, y_test_wine)
acc_iris_noisy = check_models(rnsa_iris_hyper, x_train_iris, y_train_iris, x_test_iris_noisy, y_test_iris)
acc_heart_noisy = check_models(rnsa_heart_hyper, x_train_heart, y_train_heart, x_test_heart_noisy, y_test_heart)
acc_cardi_noisy = check_models(rnsa_cardi_hyper, x_train_cardi, y_train_cardi, x_test_cardi_noisy, y_test_cardi)

[92m✔ Non-self detectors for classes (3, 4, 5, 6, 7, 8) successfully generated[0m:  ┇██████████┇ 468/468 detectors
[92m✔ Non-self detectors for classes (0, 1, 2) successfully generated[0m:  ┇██████████┇ 1035/1035 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 776/776 detectors
[92m✔ Non-self detectors for classes (0, 1) successfully generated[0m:  ┇██████████┇ 32/32 detectors


In [52]:
acc_knn_wine_noisy = check_models(model_knn, x_train_wine, y_train_wine, x_test_wine_noisy, y_test_wine)
acc_knn_iris_noisy = check_models(model_knn, x_train_iris, y_train_iris, x_test_iris_noisy, y_test_iris)
acc_knn_heart_noisy = check_models(model_knn, x_train_heart, y_train_heart, x_test_heart_noisy, y_test_heart)
acc_knn_cardi_noisy = check_models(model_knn, x_train_cardi, y_train_cardi, x_test_cardi_noisy, y_test_cardi)

In [53]:
print(f"RNSA на шум.данных wine: {acc_wine_noisy}")
print(f"RNSA на шум.данных iris: {acc_iris_noisy}")
print(f"RNSA на шум.данных heart: {acc_heart_noisy}")
print(f"RNSA на шум.данных cardi: {acc_cardi_noisy}")
print(f"KNN на шум.данных wine: {acc_knn_wine_noisy}")
print(f"KNN на шум.данных iris: {acc_knn_iris_noisy}")
print(f"KNN на шум.данных heart: {acc_knn_heart_noisy}")
print(f"KNN на шум.данных cardi: {acc_knn_cardi_noisy}")


RNSA на шум.данных wine: 0.15208333333333332
RNSA на шум.данных iris: 0.5555555555555556
RNSA на шум.данных heart: 0.49333333333333335
RNSA на шум.данных cardi: 0.5
KNN на шум.данных wine: 0.28541666666666665
KNN на шум.данных iris: 0.5777777777777777
KNN на шум.данных heart: 0.582
KNN на шум.данных cardi: 0.6666666666666666


### Сводка данных:

In [61]:
dataframes = ["WINE", "IRIS", "HEART","CARDI"]
rnsa_norm = [acc_wine, acc_iris, acc_heart, acc_norm_cardi]
rnsa_non_norm = [acc_wines, acc_iriss, acc_hearts, acc_non_norm_cardi]
rnsa_noisy = [acc_wine_noisy, acc_iris_noisy, acc_heart_noisy, acc_cardi_noisy]
rnsa_optimiized = [acc_wines_hyper, acc_iriss_hyper, acc_hearts_hyper, acc_cardi_opti]
rnsa_cardi_non_opt = [None,None,None, acc_cardis_opti]
knn_norm = [acc_knn_wine, acc_knn_iris, acc_knn_heart, acc_knn_cardi]
knn_non_norm = [acc_knn_wines, acc_knn_iriss, acc_knn_hearts, acc_knn_cardis]
knn_noisy = [acc_knn_wine_noisy, acc_knn_iris_noisy, acc_knn_heart_noisy, acc_knn_cardi_noisy]

data = {
    "DataFrame": dataframes,
    "RNSA normalized data": rnsa_norm,
    "KNN normalized data": knn_norm,
    "RNSA non-normalized data": rnsa_non_norm,
    "KNN non-normalized data": knn_non_norm,
    "RNSA с оптимизированными гиперпараметрами noisy data": rnsa_noisy,
    "KNN noisy data": knn_noisy,
    "RNSA after optimized (norm-data)": rnsa_optimiized,
    "RNSA after non-norm cardi optimized": rnsa_cardi_non_opt
}

da = pd.DataFrame(data)
da.head()


Unnamed: 0,DataFrame,RNSA normalized data,KNN normalized data,RNSA non-normalized data,KNN non-normalized data,RNSA с оптимизированными гиперпараметрами noisy data,KNN noisy data,RNSA after optimized (norm-data),RNSA after non-norm cardi optimized
0,WINE,0.310417,0.60625,0.220833,0.525,0.152083,0.285417,0.40625,
1,IRIS,0.288889,1.0,0.288889,1.0,0.555556,0.577778,0.933333,
2,HEART,0.421333,0.971333,0.306667,0.943333,0.493333,0.582,0.818667,
3,CARDI,0.443333,0.93,0.46,0.51,0.5,0.666667,0.71,0.663333
