In [3]:
import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters
sequence_length = 1
input_size = 41
hidden_size = 128
num_layers = 2
num_classes = 5
batch_size = 100
num_epochs = 4
learning_rate = 0.001

# NSL-KDD datasets
df = pd.read_csv('NSL-KDD/KDDTrain+.txt', sep=',')
df.columns = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes',
       'dst_bytes', 'land', 'wrong_fragment', 'urgent', 'hot',
       'num_failed_logins', 'logged_in', 'num_compromised', 'root_shell',
       'su_attempted', 'num_root', 'num_file_creations', 'num_shells',
       'num_access_files', 'num_outbound_cmds', 'is_host_login',
       'is_guest_login', 'count', 'srv_count', 'serror_rate',
       'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 'same_srv_rate',
       'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
       'dst_host_srv_count', 'dst_host_same_srv_rate',
       'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate',
       'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
       'dst_host_srv_serror_rate', 'dst_host_rerror_rate',
       'dst_host_srv_rerror_rate', 'labels', 'difficulty']

# Convert categorial feature into numeric
protocols = df.protocol_type.value_counts()
protocols_map = {}
for i, (prop, count) in enumerate(protocols.items()):
    protocols_map[prop] = i
df.protocol_type = df.protocol_type.map(protocols_map)

# Convert categorial feature into numeric
services = df.service.value_counts()
service_map = {}
for i, (ser, count) in enumerate(services.items()):
    service_map[ser] = i
df.service = df.service.map(service_map)

# Convert categorial feature into numeric
flag_map = {}
flags = df.flag.value_counts()
for i, (flag, count) in enumerate(flags.items()):
    flag_map[flag] = i
df.flag = df.flag.map(flag_map)

# Rename every attack label: 0=normal, 1=DoS, 2=Probe, 3=R2L and 4=U2R.
df['targets'] = df.labels.replace({ 'normal' : 0, 'neptune' : 1 ,'back': 1, 'land': 1, 'pod': 1, 'smurf': 1, 'teardrop': 1,'mailbomb': 1, 'apache2': 1, 'processtable': 1, 'udpstorm': 1, 'worm': 1,
                           'ipsweep' : 2,'nmap' : 2,'portsweep' : 2,'satan' : 2,'mscan' : 2,'saint' : 2
                           ,'ftp_write': 3,'guess_passwd': 3,'imap': 3,'multihop': 3,'phf': 3,'spy': 3,'warezclient': 3,'warezmaster': 3,'sendmail': 3,'named': 3,'snmpgetattack': 3,'snmpguess': 3,'xlock': 3,'xsnoop': 3,'httptunnel': 3,
                           'buffer_overflow': 4,'loadmodule': 4,'perl': 4,'rootkit': 4,'ps': 4,'sqlattack': 4,'xterm': 4})

# Convert into train and test datasets
train_data, val_data = df[:100000], df[100000:125900]

# Seperate features and target
train_data_x = train_data.drop(['labels', 'difficulty', 'targets'], axis=1)
train_data_y = train_data['targets']
val_data_x = val_data.drop(['labels', 'difficulty', 'targets'], axis=1)
val_data_y = val_data['targets']

# apply standardscaler into datasets
scaler = StandardScaler()
train_data_x = scaler.fit_transform(train_data_x)
val_data_x = scaler.fit_transform(val_data_x)

# train_data = TensorDataset(torch.tensor(train_data_x.values.astype(np.float32)).type(torch.LongTensor), torch.tensor(train_data_y.values.astype(np.float32)).type(torch.LongTensor))
# val_data = TensorDataset(torch.from_numpy(val_data_x.values.astype(np.float32)).type(torch.LongTensor), torch.from_numpy(val_data_y.values.astype(np.float32)).type(torch.LongTensor))

# convert into tensor datasets
train_data = TensorDataset(torch.tensor(train_data_x), torch.tensor(train_data_y.values.astype(np.float32)).type(torch.LongTensor))
val_data = TensorDataset(torch.tensor(val_data_x), torch.from_numpy(val_data_y.values.astype(np.float32)).type(torch.LongTensor))

# dataloader
train_data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_data_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)


# Recurrent neural network (many-to-one)
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        # Set initial hidden and cell states 
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 

        out, _ = self.rnn(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_data_loader)
for epoch in range(num_epochs):
    for i, (input_data, labels) in enumerate(train_data_loader):
        input_data = input_data.reshape(-1, sequence_length, input_size).to(device)
        input_data = input_data.float().to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(input_data)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 300 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for input_data, labels in valid_data_loader:
        input_data = input_data.reshape(-1, sequence_length, input_size).to(device)
        input_data = input_data.float().to(device)
        labels = labels.to(device)
        outputs = model(input_data)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Validation Accuracy of the model : {} %'.format(100 * correct / total)) 

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')

Epoch [1/4], Step [300/1000], Loss: 0.0594
Epoch [1/4], Step [600/1000], Loss: 0.0375
Epoch [1/4], Step [900/1000], Loss: 0.0122
Epoch [2/4], Step [300/1000], Loss: 0.0190
Epoch [2/4], Step [600/1000], Loss: 0.0837
Epoch [2/4], Step [900/1000], Loss: 0.0076
Epoch [3/4], Step [300/1000], Loss: 0.0251
Epoch [3/4], Step [600/1000], Loss: 0.0793
Epoch [3/4], Step [900/1000], Loss: 0.0078
Epoch [4/4], Step [300/1000], Loss: 0.0285
Epoch [4/4], Step [600/1000], Loss: 0.0223
Epoch [4/4], Step [900/1000], Loss: 0.0063
Validation Accuracy of the model : 98.98069498069498 %
