In [None]:
!pip install gdown
!gdown --no-check-certificate --folder https://drive.google.com/drive/folders/17g55PHmMWFo6aBNhmzjOxVDfwtSDFUl3?usp=drive_link

Retrieving folder contents
Processing file 1J2eOhACVhee6fnrO0wA5ct255sVR5EQS BCNET_regular.csv
Processing file 1d4e4g7PNVkxO-H4ZknemlWZNnWOpJkLV Code_Red_I.csv
Processing file 1IleVfZkR-EQ0X6-TmM1UXZhDphFn-auG Moscow_blackout.csv
Processing file 1YFeW7-KnN474mkULMOAs3h3cxFS7tdb4 Nimda.csv
Processing file 1_PlE4ABTghvpYQ7M1MfT8w5xQmXopQok RIPE_regular.csv
Processing file 1WjlwJW4dHABkkx0ANAfAQBLIo9OZf5C1 Slammer.csv
Processing file 1RKRYfuaEmRCsgUaHIZICxL9FngrMYP7u WannaCrypt.csv
Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From: https://drive.google.com/uc?id=1J2eOhACVhee6fnrO0wA5ct255sVR5EQS
To: /content/BGP_RIPE_datasets/BCNET_regular.csv
100% 151k/151k [00:00<00:00, 64.4MB/s]
Downloading...
From: https://drive.google.com/uc?id=1d4e4g7PNVkxO-H4ZknemlWZNnWOpJkLV
To: /content/BGP_RIPE_datasets/Code_Red_I.csv
100% 734k/734k [00:00<00:00, 133MB/s]
Downloading...
From: https://drive.google.com/uc?id=1IleVfZkR-EQ0X6

In [None]:
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight

# Constants
SEQUENCE_LENGTH = 10
FEATURES_START = 4
FEATURES_END = 41
LABEL_COLUMN = 41
BATCH_SIZE = 64
EPOCHS = 20
LEARNING_RATE = 0.001

# Custom dataset for LSTM
class BGPDataset(Dataset):
    def __init__(self, data, labels, sequence_length=SEQUENCE_LENGTH):
        self.data = data
        self.labels = labels
        self.sequence_length = sequence_length

    def __len__(self):
        return len(self.data) - self.sequence_length + 1

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.sequence_length]
        y = self.labels[idx + self.sequence_length - 1]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

# Load and preprocess data
def load_data(file_paths):
    all_data = []
    for file_path in file_paths:
        df = pd.read_csv(file_path, header=None)
        features = df.iloc[:, FEATURES_START:FEATURES_END + 1].values
        labels = df.iloc[:, LABEL_COLUMN].replace(-1, 0).values
        all_data.append((features, labels))
    return all_data

# Normalize features
def normalize_data(train_features, test_features):
    scaler = MinMaxScaler()
    train_features = scaler.fit_transform(train_features)
    test_features = scaler.transform(test_features)
    return train_features, test_features

# Compute class weights for imbalanced data
def compute_weights(labels):
    classes = np.unique(labels)
    class_weights = compute_class_weight(class_weight='balanced', classes=classes, y=labels)
    return {i: weight for i, weight in zip(classes, class_weights)}

# LSTM Model with Bidirectional LSTM and Dropout
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(
            input_size, hidden_size, num_layers, batch_first=True, bidirectional=True, dropout=0.3
        )
        self.fc = nn.Linear(hidden_size * 2, output_size)  # *2 for bidirectional
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.dropout(out)
        out = self.fc(out[:, -1, :])  # Use the last timestep's output
        return torch.sigmoid(out)

# Training function
def train_model(model, train_loader, criterion, optimizer, scheduler, epochs=EPOCHS):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for x_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(x_batch)
            loss = criterion(outputs.squeeze(), y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        scheduler.step(total_loss / len(train_loader))
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")

# Evaluation function with zero_division parameter
def evaluate_model(model, test_loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for x_batch, y_batch in test_loader:
            outputs = model(x_batch)
            preds = (outputs.squeeze() > 0.5).int()
            all_preds.extend(preds.tolist())
            all_labels.extend(y_batch.tolist())
    print(classification_report(all_labels, all_preds, digits=4, zero_division=1))

# Main script
def main(folder_path):
    # Get all CSV file paths in the folder
    file_paths = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".csv")]

    # Split files into training (BCNET and RIPE) and testing (others)
    train_files = [f for f in file_paths if 'BCNET_regular' in f or 'RIPE_regular' in f]
    test_files = [f for f in file_paths if f not in train_files]

    print("Training files:", train_files)
    print("Testing files:", test_files)

    # Load and preprocess data
    train_data = load_data(train_files)
    test_data = load_data(test_files)

    # Prepare training and testing datasets
    train_features = np.vstack([data[0] for data in train_data])
    train_labels = np.concatenate([data[1] for data in train_data])
    test_features = np.vstack([data[0] for data in test_data])
    test_labels = np.concatenate([data[1] for data in test_data])

    # Normalize data
    train_features, test_features = normalize_data(train_features, test_features)

    # Compute class weights
    class_weights = compute_weights(train_labels)
    print("Class weights:", class_weights)

    # Create Datasets and DataLoaders
    train_dataset = BGPDataset(train_features, train_labels)
    test_dataset = BGPDataset(test_features, test_labels)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

    # Model Initialization
    input_size = FEATURES_END - FEATURES_START + 1
    hidden_size = 128
    num_layers = 3
    output_size = 1  # Binary classification
    model = LSTMModel(input_size, hidden_size, num_layers, output_size)
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.1)

    # Train and Evaluate
    train_model(model, train_loader, criterion, optimizer, scheduler)
    evaluate_model(model, test_loader)

# Path to the folder containing the datasets
folder_path = "/content/BGP_RIPE_datasets/"  # Update with your folder path
main(folder_path)


Training files: ['/content/BGP_RIPE_datasets/BCNET_regular.csv', '/content/BGP_RIPE_datasets/RIPE_regular.csv']
Testing files: ['/content/BGP_RIPE_datasets/Code_Red_I.csv', '/content/BGP_RIPE_datasets/Moscow_blackout.csv', '/content/BGP_RIPE_datasets/Slammer.csv', '/content/BGP_RIPE_datasets/Nimda.csv', '/content/BGP_RIPE_datasets/WannaCrypt.csv']
Class weights: {0: 1.0}
Epoch 1/20, Loss: 0.0354
Epoch 2/20, Loss: 0.0000
Epoch 3/20, Loss: 0.0000
Epoch 4/20, Loss: 0.0000
Epoch 5/20, Loss: 0.0000
Epoch 6/20, Loss: 0.0000
Epoch 7/20, Loss: 0.0000
Epoch 8/20, Loss: 0.0000
Epoch 9/20, Loss: 0.0000
Epoch 10/20, Loss: 0.0000
Epoch 11/20, Loss: 0.0000
Epoch 12/20, Loss: 0.0000
Epoch 13/20, Loss: 0.0000
Epoch 14/20, Loss: 0.0000
Epoch 15/20, Loss: 0.0000
Epoch 16/20, Loss: 0.0000
Epoch 17/20, Loss: 0.0000
Epoch 18/20, Loss: 0.0000
Epoch 19/20, Loss: 0.0000
Epoch 20/20, Loss: 0.0000
              precision    recall  f1-score   support

         0.0     0.7898    1.0000    0.8825     32950
      