In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [5]:
column_names = [f'f_{i}' for i in range(1, 42)] + ['label', 'difficulty']

df = pd.read_csv('KDDTrain+.txt', header=None, names=column_names)
df = df.drop(columns=['difficulty']) 

nominal_cols = ['f_2', 'f_3', 'f_4']

df_encoded = pd.get_dummies(df, columns=nominal_cols, drop_first=True)

df_encoded['binary_label'] = df_encoded['label'].apply(lambda x: 0 if x == 'normal' else 1)
df_encoded = df_encoded.drop(columns=['label']) 

features = df_encoded.drop(columns=['binary_label']).columns
X = df_encoded[features]
y = df_encoded['binary_label'] 

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

X_train_val, X_test, y_train_val, y_test = train_test_split(
    X_scaled, y.values, test_size=0.2, random_state=42, stratify=y
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=(0.1/0.8), random_state=42, stratify=y_train_val
)

IN_FEATURES = X_train.shape[1] 
NUM_CLASSES = 2

In [6]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).unsqueeze(1)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).unsqueeze(1)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

BATCH_SIZE = 64
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self, input_length, num_classes):
        super(SimpleCNN, self).__init__()
        
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=5, padding=2)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool1d(kernel_size=2)

        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=5, padding=2)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool1d(kernel_size=2)
        
        self._to_linear = 64 * (input_length // 4) 
        
        self.fc1 = nn.Linear(self._to_linear, 128)
        self.dropout = nn.Dropout(0.5)
        self.fc_out = nn.Linear(128, num_classes)
        
    def forward(self, x):
        
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        
        x = x.view(-1, self._to_linear) 
        
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout(x)
        x = self.fc_out(x)
        return x

In [None]:
def evaluate_model(model, data_loader):
    model.eval() 
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for X_batch, y_batch in data_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            outputs = model(X_batch)
            _, predicted = torch.max(outputs.data, 1) 
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(y_batch.cpu().numpy())
    
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return accuracy, f1

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=15):
    
    best_val_accuracy = 0.0
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item() * X_batch.size(0)
        
        avg_train_loss = total_loss / len(train_loader.dataset)
        
        val_accuracy, val_f1 = evaluate_model(model, val_loader)
        
        print(f"Epoch [{epoch+1}/{num_epochs}], "
              f"Eğitim Kaybı: {avg_train_loss:.4f}, "
              f"Doğrulama Doğruluğu: {val_accuracy:.4f}, "
              f"Doğrulama F1: {val_f1:.4f}")
              
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), 'best_cnn_model.pth') 
            print(">>> Yeni En İyi Model Kaydedildi <<<")

def final_test_evaluation(model, test_loader):
    print("\n--- Nihai Test Değerlendirmesi ---")
    accuracy, f1 = evaluate_model(model, test_loader)
    
    all_preds = []
    all_labels = []
    model.eval()
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch.to(device))
            _, predicted = torch.max(outputs.data, 1) 
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(y_batch.cpu().numpy())

    cm = confusion_matrix(all_labels, all_preds)
    
    print(f"Test Doğruluğu (Test Accuracy): {accuracy:.4f}")
    print(f"Test F1 Skoru (Test F1 Score): {f1:.4f}")
    print("Karmaşıklık Matrisi (Confusion Matrix):")
    print(cm)

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_cnn = SimpleCNN(IN_FEATURES, NUM_CLASSES).to(device)

criterion = nn.CrossEntropyLoss()
LEARNING_RATE = 0.001
optimizer = optim.Adam(model_cnn.parameters(), lr=LEARNING_RATE)
NUM_EPOCHS = 15

print(f"CNN Modeli {device} üzerinde eğitiliyor. Başlangıç Öğrenme Hızı: {LEARNING_RATE}")

train_model(model_cnn, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

model_cnn.load_state_dict(torch.load('best_cnn_model.pth'))

final_test_evaluation(model_cnn, test_loader)

CNN Modeli cpu üzerinde eğitiliyor. Başlangıç Öğrenme Hızı: 0.001
Epoch [1/15], Eğitim Kaybı: 0.0803, Doğrulama Doğruluğu: 0.9781, Doğrulama F1: 0.9781
>>> Yeni En İyi Model Kaydedildi <<<
Epoch [2/15], Eğitim Kaybı: 0.0465, Doğrulama Doğruluğu: 0.9833, Doğrulama F1: 0.9832
>>> Yeni En İyi Model Kaydedildi <<<
Epoch [3/15], Eğitim Kaybı: 0.0371, Doğrulama Doğruluğu: 0.9890, Doğrulama F1: 0.9890
>>> Yeni En İyi Model Kaydedildi <<<
Epoch [4/15], Eğitim Kaybı: 0.0311, Doğrulama Doğruluğu: 0.9898, Doğrulama F1: 0.9898
>>> Yeni En İyi Model Kaydedildi <<<
Epoch [5/15], Eğitim Kaybı: 0.0265, Doğrulama Doğruluğu: 0.9927, Doğrulama F1: 0.9927
>>> Yeni En İyi Model Kaydedildi <<<
Epoch [6/15], Eğitim Kaybı: 0.0220, Doğrulama Doğruluğu: 0.9942, Doğrulama F1: 0.9942
>>> Yeni En İyi Model Kaydedildi <<<
Epoch [7/15], Eğitim Kaybı: 0.0197, Doğrulama Doğruluğu: 0.9944, Doğrulama F1: 0.9944
>>> Yeni En İyi Model Kaydedildi <<<
Epoch [8/15], Eğitim Kaybı: 0.0184, Doğrulama Doğruluğu: 0.9945, Doğrulam