In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers


df = pd.read_csv("E:\Final year project\project\datapreprocessing\iot_combined.csv")


top_labels = {
    'PartOfAHorizontalPortScan': 1,
    'Benign': 0,
    'Okiru': 2,
    'DDoS': 3,
}

df = df[df['label'].isin(top_labels)]

balanced_df = pd.concat([df[df['label'] == label].sample(12500, replace=True) for label in top_labels])


balanced_df = balanced_df.sample(frac=1).reset_index(drop=True)
balanced_df['label'] = balanced_df['label'].map(top_labels)

In [5]:
from tensorflow.keras.utils import to_categorical

# Extract features and labels
X = balanced_df.drop(columns=['label'])
y = balanced_df['label']

# Convert the labels to one-hot encoding
num_classes = 4
y_encoded = to_categorical(y, num_classes)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Reshape the data for CNN input
input_shape = (24, 1)
X_train_reshaped = X_train.values.reshape(X_train.shape[0], *input_shape)
X_test_reshaped = X_test.values.reshape(X_test.shape[0], *input_shape)

# Create and train the CNN model
cnn_model = keras.Sequential([
    layers.Conv1D(32, 3, activation='relu', input_shape=input_shape),
    layers.MaxPooling1D(2),
    layers.Conv1D(64, 3, activation='relu'),
    layers.MaxPooling1D(2),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
cnn_model.fit(X_train_reshaped, y_train, epochs=10, validation_data=(X_test_reshaped, y_test))

# Extract features using the trained CNN model
feature_extractor = keras.Model(inputs=cnn_model.input, outputs=cnn_model.layers[-2].output)
X_train_features = feature_extractor.predict(X_train_reshaped)
X_test_features = feature_extractor.predict(X_test_reshaped)

X_train_features_reshaped = X_train_features[:, :24].reshape((X_train_features.shape[0], 24, 1))
X_test_features_reshaped = X_test_features[:, :24].reshape((X_test_features.shape[0], 24, 1))

cnn_model = keras.Sequential([
    layers.Conv1D(32, 3, activation='relu', input_shape=(24, 1)),
    layers.MaxPooling1D(2),
    layers.Conv1D(64, 3, activation='relu'),
    layers.MaxPooling1D(2),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
cnn_model.fit(X_train_features_reshaped, y_train, epochs=10, validation_data=(X_test_features_reshaped, y_test))

# Evaluate the CNN model
loss, accuracy = cnn_model.evaluate(X_test_features_reshaped, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 1.5307948589324951, Test Accuracy: 0.6807000041007996


In [6]:
from sklearn.metrics import classification_report

y_pred = cnn_model.predict(X_test_features_reshaped)

# Convert one-hot encoded labels back to categorical labels
y_test_categorical = y_test.argmax(axis=1)
y_pred_categorical = y_pred.argmax(axis=1)

# Generate a classification report
class_report = classification_report(y_test_categorical, y_pred_categorical)

# Print the classification report
print("Classification Report:\n", class_report)

Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.56      0.69      2492
           1       0.60      0.49      0.54      2491
           2       0.49      0.88      0.63      2463
           3       1.00      0.80      0.89      2554

    accuracy                           0.68     10000
   macro avg       0.75      0.68      0.69     10000
weighted avg       0.75      0.68      0.69     10000
