In [None]:
from defense.generate_dataset import generate_defense_set, generate_testset
import tensorflow as tf
import pandas as pd
import numpy as np
from art.estimators.classification import TensorFlowV2Classifier
from evaluation_metric import evaluate_metric

# TRAIN DETECTOR

In [None]:
# from defense.generate_dataset import generate_dataset

samples = pd.read_csv("NSL-KDD/samples/X_train.csv").drop(columns=["Unnamed: 0"])
y_samples = pd.read_csv("NSL-KDD/samples/y_train.csv").drop(columns=["Unnamed: 0"])

fsgm_samples = pd.read_csv("NSL-KDD/samples/attack/X_FSGM_train.csv").drop(columns=["Unnamed: 0"])
bim_samples = pd.read_csv("NSL-KDD/samples/attack/X_BIM_train.csv").drop(columns=["Unnamed: 0"])
jsma_samples = pd.read_csv("NSL-KDD/samples/attack/X_JSMA_train.csv").drop(columns=["Unnamed: 0"])
deepfool_samples = pd.read_csv("NSL-KDD/samples/attack/X_DeepFool_train.csv").drop(columns=["Unnamed: 0"])

full_features = samples.columns
shap_importance_df = pd.read_csv("NSL-KDD/samples/shap_importance.csv").drop(columns=["Unnamed: 0"])
selected_features = shap_importance_df.loc[shap_importance_df['shap_importance'] > 0]['column_name'].values
shap_importance_feature_values = shap_importance_df.loc[shap_importance_df['shap_importance'] > 0]['shap_importance'].values

In [None]:
from imblearn.over_sampling import SMOTE

defense_data = generate_defense_set(
    samples,
    fsgm_samples,
    bim_samples,
    jsma_samples,
    deepfool_samples,
    selected_features,
    shap_importance_feature_values,
    50
)
from sklearn.model_selection import train_test_split
x_defense_train, x_defense_test, y_defense_train, y_defense_test = train_test_split(
    defense_data.drop(columns=['label']),
    defense_data['label'],
    test_size=0.3,
    stratify=defense_data['label']
)

balance_defense = SMOTE(sampling_strategy={0:1750})

x_defense_train, y_defense_train = balance_defense.fit_resample(x_defense_train, y_defense_train)

x_defense_train = np.reshape(x_defense_train, (x_defense_train.shape[0], x_defense_train.shape[1], 1))
x_defense_test = np.reshape(x_defense_test, (x_defense_test.shape[0], x_defense_test.shape[1], 1))
y_defense_train = pd.get_dummies(y_defense_train)*1
y_defense_test = pd.get_dummies(y_defense_test)*1

In [None]:
def create_cnn(n_features):
    ae_detector_dnn = tf.keras.Sequential(
        layers=[
            tf.keras.layers.Convolution1D(2048, 3, padding='same', activation='relu', input_shape=(n_features,1)),
            tf.keras.layers.MaxPooling1D(pool_size=4),
            tf.keras.layers.Convolution1D(512, 3, padding='same', activation='relu'),
            tf.keras.layers.MaxPooling1D(pool_size=4),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(512, activation='relu'),
            tf.keras.layers.Dense(2, activation='softmax'),
        ]
    )
    
    ae_detector_dnn.compile(
        loss=tf.keras.losses.CategoricalCrossentropy(),
        optimizer=tf.keras.optimizers.Adam(),
        metrics=['accuracy']
    )
    
    return ae_detector_dnn

ae_detector_cnn = create_cnn(x_defense_train.shape[1])
ae_detector_cnn.fit(x_defense_train, y_defense_train, epochs=200, batch_size=16)

# EVALUATE

RQ1: on test-set from defense data

In [None]:
evaluate_metric(y_pred=ae_detector_cnn.predict(x_defense_train), y_true=y_defense_train)

In [None]:
y_defense_train.value_counts()

In [None]:
evaluate_metric(y_pred=ae_detector_cnn.predict(x_defense_test), y_true=y_defense_test)

In [None]:
y_defense_test.value_counts()

RQ2: on random test set

In [None]:
dnn = tf.keras.models.load_model("NSL-KDD/dnn.h5")

X_train = pd.read_csv("NSL-KDD/X_train.csv").drop(columns=['Unnamed: 0'])
y_train = pd.read_csv("NSL-KDD/y_train.csv").drop(columns=['Unnamed: 0'])
X_test = pd.read_csv("NSL-KDD/X_test.csv").drop(columns=['Unnamed: 0'])
y_test = pd.read_csv("NSL-KDD/y_test.csv").drop(columns=['Unnamed: 0'])


loss_object = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
dnn_model = TensorFlowV2Classifier(
    model=dnn,
    loss_object=loss_object,
    optimizer=optimizer,
    nb_classes=5,
    input_shape=X_train.shape
)

test_samples = X_test.sample(n=10000)
y_test_samples = y_test.iloc[test_samples.index]
ae_data, label_ae_data = generate_testset(
    dnn_model,
    test_samples,
    y_test_samples,
    selected_features,
    50
)
ae_data.to_csv("NSL-KDD/defense/train-test/ae_x_train.csv")
pd.DataFrame(label_ae_data, columns=['label']).to_csv("NSL-KDD/defense/train-test/ae_y_train.csv")

In [None]:
ae_data = pd.read_csv("NSL-KDD/defense/test/data.csv").drop(columns=["Unnamed: 0"])
label_ae_data = pd.read_csv("NSL-KDD/defense/test/y_data.csv").drop(columns=["Unnamed: 0"])
true_label = pd.get_dummies(label_ae_data, columns=['label'])*1
_ae_data = ae_data[selected_features[:40]]
_ae_data = np.reshape(_ae_data, (_ae_data.shape[0], _ae_data.shape[1], 1))
evaluate_metric(y_pred=ae_detector_cnn.predict(_ae_data), y_true=true_label)

In [None]:
true_label.value_counts()