In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, GRU, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score

colnames = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 
            'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 
            'num_compromised', 'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 
            'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login', 
            'is_guest_login', 'count', 'srv_count', 'serror_rate', 'srv_serror_rate', 
            'rerror_rate', 'srv_rerror_rate', 'same_srv_rate', 'diff_srv_rate', 
            'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count', 
            'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate', 
            'dst_host_srv_diff_host_rate', 'dst_host_serror_rate', 'dst_host_srv_serror_rate', 
            'dst_host_rerror_rate', 'dst_host_srv_rerror_rate', 'attack_type', 'difficulty']


train_df = pd.read_csv('KDDTrain+.txt', names=colnames, header=None)
test_df = pd.read_csv('KDDTest+.txt', names=colnames, header=None)


train_df.drop(['difficulty', 'num_outbound_cmds'], axis=1, inplace=True)
test_df.drop(['difficulty', 'num_outbound_cmds'], axis=1, inplace=True)
train_df['attack_class'] = np.where(train_df['attack_type'] == 'normal', 0, 1)
test_df['attack_class'] = np.where(test_df['attack_type'] == 'normal', 0, 1)


Y_train = train_df['attack_class']
Y_test = test_df['attack_class']
X_train = train_df.drop(['attack_type', 'attack_class'], axis=1)
X_test = test_df.drop(['attack_type', 'attack_class'], axis=1)


categorical_cols = X_train.select_dtypes(include=['object']).columns
numerical_cols = X_train.select_dtypes(include=['int64', 'float64']).columns


for col in categorical_cols:
    le = LabelEncoder()
    full_data = pd.concat([X_train[col], X_test[col]]).astype(str).unique()
    le.fit(full_data)
    X_train[col] = le.transform(X_train[col].astype(str))
    X_test[col] = le.transform(X_test[col].astype(str))


scaler = MinMaxScaler()
X_train[numerical_cols] = scaler.fit_transform(X_train[numerical_cols])
X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])


X_train = pd.get_dummies(X_train, columns=categorical_cols)
X_test = pd.get_dummies(X_test, columns=categorical_cols)
train_cols = X_train.columns
test_cols = X_test.columns


missing_in_test = set(train_cols) - set(test_cols)
for c in missing_in_test:
    X_test[c] = 0
X_test = X_test[train_cols] 


X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)


X_train_reshaped = np.expand_dims(X_train.values, axis=2)
X_test_reshaped = np.expand_dims(X_test.values, axis=2)


Y_train_cat = to_categorical(Y_train, num_classes=2)
Y_test_cat = to_categorical(Y_test, num_classes=2)


input_shape = X_train_reshaped.shape[1:] 

model = Sequential()


model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.2))


model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))


model.add(GRU(64, return_sequences=False))
model.add(Dropout(0.3))


model.add(Dense(2, activation='softmax'))


model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

print(model.summary())


print("\n--- CNN-GRU Hibrit Modeli Eğitiliyor ---")
history = model.fit(
    X_train_reshaped, Y_train_cat, 
    epochs=12, 
    batch_size=128, 
    validation_split=0.1, 
    verbose=1
)


print("\n--- Test Kümesinde Değerlendirme ---")
loss, accuracy = model.evaluate(X_test_reshaped, Y_test_cat, verbose=0)
print(f"Test Kümesi Kayıp (Loss): {loss:.4f}")
print(f"Test Kümesi Doğruluğu (Accuracy): {accuracy * 100:.2f}%")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


None

--- CNN-GRU Hibrit Modeli Eğitiliyor ---
Epoch 1/12
[1m886/886[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 42ms/step - accuracy: 0.9486 - loss: 0.1482 - val_accuracy: 0.9703 - val_loss: 0.0965
Epoch 2/12
[1m886/886[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 42ms/step - accuracy: 0.9754 - loss: 0.0731 - val_accuracy: 0.9824 - val_loss: 0.0543
Epoch 3/12
[1m886/886[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 41ms/step - accuracy: 0.9814 - loss: 0.0529 - val_accuracy: 0.9849 - val_loss: 0.0434
Epoch 4/12
[1m886/886[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 41ms/step - accuracy: 0.9831 - loss: 0.0466 - val_accuracy: 0.9879 - val_loss: 0.0351
Epoch 5/12
[1m886/886[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 40ms/step - accuracy: 0.9860 - loss: 0.0384 - val_accuracy: 0.9894 - val_loss: 0.0292
Epoch 6/12
[1m886/886[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 40ms/step - accuracy: 0.9880 - loss: 0.0334 - val_accur