In [6]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import warnings
warnings.filterwarnings('ignore') 

print("### 1. Kütüphaneler ve Veri Yüklendi. ###\n")


feature_names = [
    'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 
    'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 
    'logged_in', 'num_compromised', 'root_shell', 'su_attempted', 'num_root', 
    'num_file_creations', 'num_shells', 'num_access_files', 'num_outbound_cmds', 
    'is_host_login', 'is_guest_login', 'count', 'srv_count', 'serror_rate', 
    'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 'same_srv_rate', 
    'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count', 
    'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate', 
    'dst_host_srv_diff_host_rate', 'dst_host_serror_rate', 'dst_host_srv_serror_rate', 
    'dst_host_rerror_rate', 'dst_host_srv_rerror_rate', 'class'
]


train_file = 'KDDTrain+.txt'
test_file = 'KDDTest+.txt'

df_train = pd.read_csv(train_file, header=None, names=feature_names)
df_test = pd.read_csv(test_file, header=None, names=feature_names)

def map_attack_to_binary(attack):
    return 'normal' if attack == 'normal' else 'attack'

df_train['class'] = df_train['class'].apply(map_attack_to_binary)
df_test['class'] = df_test['class'].apply(map_attack_to_binary)

X_train = df_train.drop('class', axis=1)
y_train = df_train['class']
X_test = df_test.drop('class', axis=1)
y_test = df_test['class']


print("### 2. Ön İşleme ve Hata Çözümü Başladı. ###")


categorical_cols = ['protocol_type', 'service', 'flag']

X_train = pd.get_dummies(X_train, columns=categorical_cols, drop_first=True)
X_test = pd.get_dummies(X_test, columns=categorical_cols, drop_first=True)


train_cols = X_train.columns

missing_in_test = set(train_cols) - set(X_test.columns) 
for c in missing_in_test:
    X_test[c] = 0
X_test = X_test[train_cols] 


object_cols_train = X_train.select_dtypes(include=['object']).columns

if len(object_cols_train) > 0:
    print(f"Kalan metin sütunları kaldırılıyor: {object_cols_train.tolist()}")
    X_train = X_train.drop(columns=object_cols_train)
    X_test = X_test.drop(columns=object_cols_train, errors='ignore')


le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train) 
X_test_scaled = scaler.transform(X_test)

X_train_np = X_train_scaled
X_test_np = X_test_scaled
y_train_np = y_train_encoded 
y_test_np = y_test_encoded

print("### Ön İşleme Başarıyla Tamamlandı. ###")


print("\n### 3. MLP Modeli Oluşturuluyor ve Eğitiliyor. ###")
input_dim = X_train_np.shape[1]

model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(input_dim,)))
model.add(Dropout(0.2)) 
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

history = model.fit(X_train_np, y_train_np, 
                    epochs=10,        
                    batch_size=256,   
                    validation_split=0.1, 
                    verbose=1)

print("\n### 4. Performans Değerlendirmesi Başladı. ###")
y_pred_probs = model.predict(X_test_np)
y_pred_classes = (y_pred_probs > 0.5).astype(int)

accuracy = accuracy_score(y_test_np, y_pred_classes)

print("\n" + "=" * 60)
print(f"**MLP Modelinin Test Doğruluğu (Accuracy): {accuracy * 100:.2f}%**")
print("=" * 60)

print("\n**Sınıflandırma Raporu (Classification Report):**")
print(classification_report(y_test_np, y_pred_classes, 
                            labels=[0, 1], 
                            target_names=['Normal (0)', 'Saldırı (1)']))

### 1. Kütüphaneler ve Veri Yüklendi. ###

### 2. Ön İşleme ve Hata Çözümü Başladı. ###
Kalan metin sütunları kaldırılıyor: ['duration', 'dst_host_srv_rerror_rate']
### Ön İşleme Başarıyla Tamamlandı. ###

### 3. MLP Modeli Oluşturuluyor ve Eğitiliyor. ###
Epoch 1/10
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - accuracy: 0.9974 - loss: 0.0109 - val_accuracy: 1.0000 - val_loss: 2.4333e-05
Epoch 2/10
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 1.0000 - loss: 3.9447e-05 - val_accuracy: 1.0000 - val_loss: 5.7730e-06
Epoch 3/10
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 1.0000 - loss: 1.4927e-05 - val_accuracy: 1.0000 - val_loss: 2.2444e-06
Epoch 4/10
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 1.0000 - loss: 7.3987e-06 - val_accuracy: 1.0000 - val_loss: 1.1195e-06
Epoch 5/10
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m