In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, GlobalAveragePooling1D, Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

data = pd.read_csv("E:\Final year project\iot23_combined.csv")


In [3]:

label_mapping = {
    'PartOfAHorizontalPortScan': 0,
    'Benign': 1,
    'Okiru': 2,
    'DDoS': 3,
    'C&C': 4,
    'Attack': 5,
    'C&C-HeartBeat': 6,
    'C&C-FileDownload': 7,
    'C&C-Torii': 8,
    'FileDownload': 9,
    'C&C-HeartBeat-FileDownload': 10,
    'Okiru-Attack': 11,
    'C&C-Mirai': 12,
    '-   benign   -': 13,
}


data['label'] = data['label'].map(label_mapping)

In [4]:

non_binary_feature_columns = [
    'duration', 'orig_bytes', 'resp_bytes', 'missed_bytes', 'orig_pkts', 'orig_ip_bytes',
    'resp_pkts', 'resp_ip_bytes'
]

scaler = StandardScaler()

data[non_binary_feature_columns] = scaler.fit_transform(data[non_binary_feature_columns])


In [5]:
binary_feature_columns = [
    'proto_icmp', 'proto_tcp', 'proto_udp', 'conn_state_OTH', 'conn_state_REJ',
    'conn_state_RSTO', 'conn_state_RSTOS0', 'conn_state_RSTR', 'conn_state_RSTRH',
    'conn_state_S0', 'conn_state_S1', 'conn_state_S2', 'conn_state_S3',
    'conn_state_SF', 'conn_state_SH', 'conn_state_SHR'
]

label_encoder = LabelEncoder()
for column in binary_feature_columns:
    data[column] = label_encoder.fit_transform(data[column])

In [6]:
data

Unnamed: 0.1,Unnamed: 0,duration,orig_bytes,resp_bytes,missed_bytes,orig_pkts,orig_ip_bytes,resp_pkts,resp_ip_bytes,label,...,conn_state_RSTOS0,conn_state_RSTR,conn_state_RSTRH,conn_state_S0,conn_state_S1,conn_state_S2,conn_state_S3,conn_state_SF,conn_state_SH,conn_state_SHR
0,0,0.152556,-0.00038,-0.000362,-0.001151,-0.000505,-0.000487,-0.003381,-0.00084,0,...,0,0,0,1,0,0,0,0,0,0
1,1,0.006778,-0.00038,-0.000362,-0.001151,-0.000621,-0.000663,-0.003381,-0.00084,0,...,0,0,0,1,0,0,0,0,0,0
2,2,0.006778,-0.00038,-0.000362,-0.001151,-0.000621,-0.000663,-0.003381,-0.00084,0,...,0,0,0,1,0,0,0,0,0,0
3,3,0.152556,-0.00038,-0.000362,-0.001151,-0.000505,-0.000487,-0.003381,-0.00084,1,...,0,0,0,1,0,0,0,0,0,0
4,4,0.006778,-0.00038,-0.000362,-0.001151,-0.000621,-0.000663,-0.003381,-0.00084,1,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25144797,999994,0.006778,-0.00038,-0.000362,-0.001151,-0.000621,-0.000692,-0.003381,-0.00084,0,...,0,0,0,1,0,0,0,0,0,0
25144798,999995,0.006778,-0.00038,-0.000362,-0.001151,-0.000621,-0.000692,-0.003381,-0.00084,0,...,0,0,0,1,0,0,0,0,0,0
25144799,999996,0.006778,-0.00038,-0.000362,-0.001151,-0.000621,-0.000692,-0.003381,-0.00084,0,...,0,0,0,1,0,0,0,0,0,0
25144800,999997,0.006778,-0.00038,-0.000362,-0.001151,-0.000621,-0.000692,-0.003381,-0.00084,0,...,0,0,0,1,0,0,0,0,0,0


In [7]:
X = data[['duration', 'orig_bytes', 'resp_bytes', 'missed_bytes', 'orig_pkts', 'orig_ip_bytes', 'resp_pkts', 'resp_ip_bytes', 'proto_icmp', 'proto_tcp', 'proto_udp', 'conn_state_OTH', 'conn_state_REJ', 'conn_state_RSTO', 'conn_state_RSTOS0', 'conn_state_RSTR', 'conn_state_RSTRH', 'conn_state_S0', 'conn_state_S1', 'conn_state_S2', 'conn_state_S3', 'conn_state_SF', 'conn_state_SH', 'conn_state_SHR']]
y = data['label']

In [8]:
y.unique()

array([ 0,  1,  4,  2,  3,  6,  8,  5,  7, 11,  9, 10, 12, 13],
      dtype=int64)

In [9]:
# Convert Pandas DataFrames to NumPy arrays
X = X.values
y = y.values

# Reshape input for channels dimension
X = X.reshape(X.shape[0], X.shape[1], 1)

# Encode target labels as categorical
num_classes = 14
y = to_categorical(y, num_classes=num_classes)

In [10]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42)


In [12]:
model = Sequential()

# Convolutional layers
model.add(Conv1D(filters=36, kernel_size=5, activation='relu', input_shape=(24, 1)))
model.add(BatchNormalization())

model.add(Conv1D(filters=72, kernel_size=5, activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))

model.add(Conv1D(filters=144, kernel_size=3, activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))

model.add(Conv1D(filters=288, kernel_size=3, activation='relu'))
model.add(BatchNormalization())


model.add(GlobalAveragePooling1D())
model.add(Dense(num_classes, activation='softmax'))


optimizer = Adam()
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=32, epochs=30)

KeyboardInterrupt: 