In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [2]:
column_names = [f'f_{i}' for i in range(1, 42)] + ['label', 'difficulty']

df = pd.read_csv('KDDTrain+.txt', header=None, names=column_names)
df = df.drop(columns=['difficulty']) 

nominal_cols = ['f_2', 'f_3', 'f_4']

df_encoded = pd.get_dummies(df, columns=nominal_cols, drop_first=True)

df_encoded['binary_label'] = df_encoded['label'].apply(lambda x: 0 if x == 'normal' else 1)
df_encoded = df_encoded.drop(columns=['label']) 

features = df_encoded.drop(columns=['binary_label']).columns
X = df_encoded[features]
y = df_encoded['binary_label'] 

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

X_train_val, X_test, y_train_val, y_test = train_test_split(
    X_scaled, y.values, test_size=0.2, random_state=42, stratify=y
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=(0.1/0.8), random_state=42, stratify=y_train_val
)

IN_FEATURES = X_train.shape[1] 
NUM_CLASSES = 2 

print(f"Başlangıç Özellik Sayısı (41) -> OHE Sonrası Giriş Boyutu: {IN_FEATURES}")

Başlangıç Özellik Sayısı (41) -> OHE Sonrası Giriş Boyutu: 119


In [3]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

BATCH_SIZE = 64
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"Eğitim Veri Yükleyici (Train DataLoader) hazır: {len(train_loader)} batch.")

Eğitim Veri Yükleyici (Train DataLoader) hazır: 1378 batch.


In [4]:
class OptimizedDNN(nn.Module):
    def __init__(self, input_size, num_classes, dropout_rate=0.4):
        super(OptimizedDNN, self).__init__()
        
        self.layer1 = nn.Linear(input_size, 256) 
        self.bn1 = nn.BatchNorm1d(256)
        self.dropout1 = nn.Dropout(dropout_rate)
        
        self.layer2 = nn.Linear(256, 128) 
        self.bn2 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(dropout_rate)
        
        self.layer3 = nn.Linear(128, 64) 
        self.bn3 = nn.BatchNorm1d(64)
        
        self.layer4 = nn.Linear(64, 32) 
        
        self.output_layer = nn.Linear(32, num_classes)
        self.relu = nn.ReLU() 
        
    def forward(self, x):
        
        x = self.layer1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout1(x)
        
        x = self.layer2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.dropout2(x)
        
        x = self.layer3(x)
        x = self.bn3(x)
        x = self.relu(x)
        
        x = self.layer4(x)
        x = self.relu(x)
        
        x = self.output_layer(x)
        return x

In [5]:
class OptimizedDNN(nn.Module):
    def __init__(self, input_size, num_classes, dropout_rate=0.4):
        super(OptimizedDNN, self).__init__()
        
        self.layer1 = nn.Linear(input_size, 256) 
        self.bn1 = nn.BatchNorm1d(256)
        self.dropout1 = nn.Dropout(dropout_rate)
        
        self.layer2 = nn.Linear(256, 128) 
        self.bn2 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(dropout_rate)
        
        self.layer3 = nn.Linear(128, 64) 
        self.bn3 = nn.BatchNorm1d(64)
        
        self.layer4 = nn.Linear(64, 32) 
        
        self.output_layer = nn.Linear(32, num_classes)
        self.relu = nn.ReLU() 
        
    def forward(self, x):
        
        x = self.layer1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout1(x)
        
        x = self.layer2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.dropout2(x)
        
        x = self.layer3(x)
        x = self.bn3(x)
        x = self.relu(x)
        
        x = self.layer4(x)
        x = self.relu(x)
        
        x = self.output_layer(x)
        return x

In [6]:
def evaluate_model(model, data_loader):
    model.eval() 
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for X_batch, y_batch in data_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            outputs = model(X_batch)
            _, predicted = torch.max(outputs.data, 1) 
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(y_batch.cpu().numpy())
    
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return accuracy, f1

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=15):
    
    best_val_accuracy = 0.0
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item() * X_batch.size(0)
        
        avg_train_loss = total_loss / len(train_loader.dataset)
        
        val_accuracy, val_f1 = evaluate_model(model, val_loader)
        
        print(f"Epoch [{epoch+1}/{num_epochs}], "
              f"Eğitim Kaybı: {avg_train_loss:.4f}, "
              f"Doğrulama Doğruluğu: {val_accuracy:.4f}, "
              f"Doğrulama F1: {val_f1:.4f}")
              
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), 'best_optimized_dnn_model.pth')
            print(">>> Yeni En İyi Model Kaydedildi <<<")

def final_test_evaluation(model, test_loader):
    print("\n--- Nihai Test Değerlendirmesi ---")
    accuracy, f1 = evaluate_model(model, test_loader)
    
    all_preds = []
    all_labels = []
    model.eval()
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch.to(device))
            _, predicted = torch.max(outputs.data, 1) 
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(y_batch.cpu().numpy())

    cm = confusion_matrix(all_labels, all_preds)
    
    print(f"Test Doğruluğu (Test Accuracy): {accuracy:.4f}")
    print(f"Test F1 Skoru (Test F1 Score): {f1:.4f}")
    print("Karmaşıklık Matrisi (Confusion Matrix):")
    print(cm)


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = OptimizedDNN(IN_FEATURES, NUM_CLASSES, dropout_rate=0.4).to(device)

criterion = nn.CrossEntropyLoss()
LEARNING_RATE = 0.001
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
NUM_EPOCHS = 15

print(f"Model {device} üzerinde eğitiliyor. Başlangıç Öğrenme Hızı: {LEARNING_RATE}")

train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

model.load_state_dict(torch.load('best_optimized_dnn_model.pth'))

final_test_evaluation(model, test_loader)

Model cpu üzerinde eğitiliyor. Başlangıç Öğrenme Hızı: 0.001
Epoch [1/15], Eğitim Kaybı: 0.0663, Doğrulama Doğruluğu: 0.9844, Doğrulama F1: 0.9844
>>> Yeni En İyi Model Kaydedildi <<<
Epoch [2/15], Eğitim Kaybı: 0.0409, Doğrulama Doğruluğu: 0.9852, Doğrulama F1: 0.9852
>>> Yeni En İyi Model Kaydedildi <<<
Epoch [3/15], Eğitim Kaybı: 0.0336, Doğrulama Doğruluğu: 0.9894, Doğrulama F1: 0.9894
>>> Yeni En İyi Model Kaydedildi <<<
Epoch [4/15], Eğitim Kaybı: 0.0294, Doğrulama Doğruluğu: 0.9911, Doğrulama F1: 0.9911
>>> Yeni En İyi Model Kaydedildi <<<
Epoch [5/15], Eğitim Kaybı: 0.0266, Doğrulama Doğruluğu: 0.9928, Doğrulama F1: 0.9928
>>> Yeni En İyi Model Kaydedildi <<<
Epoch [6/15], Eğitim Kaybı: 0.0254, Doğrulama Doğruluğu: 0.9913, Doğrulama F1: 0.9914
Epoch [7/15], Eğitim Kaybı: 0.0246, Doğrulama Doğruluğu: 0.9926, Doğrulama F1: 0.9926
Epoch [8/15], Eğitim Kaybı: 0.0241, Doğrulama Doğruluğu: 0.9932, Doğrulama F1: 0.9932
>>> Yeni En İyi Model Kaydedildi <<<
Epoch [9/15], Eğitim Kaybı: 0