In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM 

train_file = 'KDDTrain+.txt'
test_file = 'KDDTest+.txt'

feature_names = [
    'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 
    'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 
    'num_compromised', 'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 
    'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login',
    'count', 'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 
    'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count', 
    'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 
    'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate', 
    'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
    'attack_type', 'difficulty_level' 
]

df_train = pd.read_csv(train_file, names=feature_names) 
df_test = pd.read_csv(test_file, names=feature_names)

In [7]:
df_train['label'] = df_train['attack_type'].apply(lambda x: 0 if x == 'normal' else 1)
df_test['label'] = df_test['attack_type'].apply(lambda x: 0 if x == 'normal' else 1)

X_train = df_train.drop(['attack_type', 'difficulty_level', 'label'], axis=1)
y_train = df_train['label']
X_test = df_test.drop(['attack_type', 'difficulty_level', 'label'], axis=1)
y_test = df_test['label']

categorical_features = ['protocol_type', 'service', 'flag']
numerical_features = X_train.columns.drop(categorical_features).tolist()

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
        ('num', StandardScaler(), numerical_features)
    ])

X_train_processed = preprocessor.fit_transform(X_train) 
X_test_processed = preprocessor.transform(X_test)

In [8]:
X_train_dense = X_train_processed 
X_test_dense = X_test_processed

TIME_STEPS = 1
INPUT_DIM = X_train_dense.shape[1]

X_train_lstm = X_train_dense.reshape(X_train_dense.shape[0], TIME_STEPS, INPUT_DIM)
X_test_lstm = X_test_dense.reshape(X_test_dense.shape[0], TIME_STEPS, INPUT_DIM)

print(f"LSTM Giriş Boyutu: {X_train_lstm.shape}")

LSTM Giriş Boyutu: (125973, 1, 122)


In [9]:
from tensorflow.keras.layers import Input 

model_lstm = Sequential([
    
    Input(shape=(TIME_STEPS, INPUT_DIM)), 
    
    LSTM(128, return_sequences=True), 
    Dropout(0.2),
    
    LSTM(64), 
    Dropout(0.2),

    Dense(32, activation='relu'),
    
    Dense(1, activation='sigmoid') 
])

model_lstm.compile(optimizer='adam',
                   loss='binary_crossentropy',
                   metrics=['accuracy'])

print("LSTM Modeli Eğitiliyor...")
history_lstm = model_lstm.fit(X_train_lstm, y_train,
                              epochs=10, 
                              batch_size=256,
                              validation_split=0.1,
                              verbose=1)

LSTM Modeli Eğitiliyor...
Epoch 1/10
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step - accuracy: 0.9711 - loss: 0.0957 - val_accuracy: 0.9913 - val_loss: 0.0266
Epoch 2/10
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.9907 - loss: 0.0254 - val_accuracy: 0.9943 - val_loss: 0.0189
Epoch 3/10
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.9924 - loss: 0.0209 - val_accuracy: 0.9941 - val_loss: 0.0174
Epoch 4/10
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.9930 - loss: 0.0194 - val_accuracy: 0.9935 - val_loss: 0.0159
Epoch 5/10
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.9936 - loss: 0.0177 - val_accuracy: 0.9945 - val_loss: 0.0145
Epoch 6/10
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 15ms/step - accuracy: 0.9937 - loss: 0.0169 - val_accuracy: 0.9948 - val_loss: 0

In [10]:
# Modelin Test Verisi Üzerindeki Performansını Değerlendirme
print("\n## Model Değerlendirme (Test Verisi Üzerinde)")
loss_lstm, accuracy_lstm = model_lstm.evaluate(X_test_lstm, y_test, verbose=0)
print(f"Test Doğruluğu: {accuracy_lstm*100:.2f}%")
print(f"Test Kaybı: {loss_lstm:.4f}")

# Detaylı Performans Raporu
y_pred_proba_lstm = model_lstm.predict(X_test_lstm)
y_pred_lstm = (y_pred_proba_lstm > 0.5).astype("int32")

print("\n## Sınıflandırma Raporu")
print(classification_report(y_test, y_pred_lstm, target_names=['Normal (0)', 'Saldırı (1)']))


## Model Değerlendirme (Test Verisi Üzerinde)
Test Doğruluğu: 75.95%
Test Kaybı: 1.8075
[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step

## Sınıflandırma Raporu
              precision    recall  f1-score   support

  Normal (0)       0.65      0.96      0.77      9711
 Saldırı (1)       0.95      0.61      0.74     12833

    accuracy                           0.76     22544
   macro avg       0.80      0.78      0.76     22544
weighted avg       0.82      0.76      0.76     22544

