In [1]:
max_vector_len = 968 

In [2]:
import csv
import numpy as np
import math
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

Tensor = torch.FloatTensor    

torch.manual_seed(125) 

class GRUModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, bias=True):
        super(GRUModel, self).__init__()
        # Hidden dimensions
        self.hidden_dim = hidden_dim         
        # Number of hidden layers
        self.layer_dim = layer_dim       
        self.gruLayer = nn.GRU(input_dim, hidden_dim, layer_dim)
        self.fcLayer = nn.Linear(hidden_dim, output_dim) 
    
    def forward(self, x):
        #print("x.shape ",x.shape) 
        out, _ = self.gruLayer(x)      
        out = self.fcLayer(out) 
        return out

def load_data(filename):
    x = []
    with open(filename,'r') as data:
        lines = data.readlines()
        for line in lines:
            line=line.strip('\n')
            x.append(list(line))
    x = np.array(x,dtype=np.float64)
    return x

'''
STEP 1: LOADING DATASET
'''
xmal = load_data('Mirai_vector_mal.txt')
ymal = np.ones((xmal.shape[0], 1), dtype=np.int)
xben = load_data('Mirai_vector_ben.txt')
yben = np.zeros((xben.shape[0], 1), dtype=np.int)
# Malicious packets are divided into training set and test set
train_size_mal = int(xmal.shape[0] * 0.8)
xtest_mal = xmal[train_size_mal:, :]
ytest_mal = ymal[train_size_mal:, :]
# Benign packets are divided into training set and test set
train_size_ben = int(xben.shape[0] * 0.8) # 69999*0.8=55999
xtrain_ben = xben[0:train_size_ben, :]
xtest_ben = xben[train_size_ben:, :]

xtest_mal = Tensor(xtest_mal)
xtrain_ben = Tensor(xtrain_ben)
xtest_ben = Tensor(xtest_ben)

'''
STEP 2: INSTANTIATE MODEL CLASS
'''
input_dim = max_vector_len
hidden_dim = int(math.sqrt(input_dim+1)+10)
layer_dim = 2  
output_dim = 1

num_epochs = 50000 # 50000<55999
print_interval = num_epochs / 10

gru = GRUModel(input_dim, hidden_dim, layer_dim, output_dim)
    
'''
STEP 3: INSTANTIATE LOSS CLASS
'''
criterion = nn.CrossEntropyLoss()
 
'''
STEP 4: INSTANTIATE OPTIMIZER CLASS
'''
learning_rate = 0.1
optimizer = optim.SGD(gru.parameters(), lr=learning_rate)

'''
STEP 5: TRAIN THE MODEL
'''
loss_list = []
for epoch in range(1, num_epochs+1):

    idx = epoch - 1
    xben_batch = xtrain_ben[idx]
    packets = Variable(xben_batch.view(-1, 1, input_dim)) # ? group*1 row*input_dim column
    labels = Variable(Tensor([[0]]).long())

    # Clear gradients w.r.t. parameters
    optimizer.zero_grad()    
    # Forward pass to get output/logits
    outputs = gru(packets)
    #print("outputs.size ",outputs.size())
    # Calculate Loss: softmax --> cross entropy loss
    loss = criterion(outputs, labels)
    # Getting gradients w.r.t. parameters
    loss.backward()
    # Updating parameters
    optimizer.step()        
    loss_list.append(loss.item())
         
    if epoch % print_interval == 0:
        # benign
        loss = 0
        for i in range(xtest_ben.shape[0]):
            packets = Variable(xtest_ben[i].view(-1, 1, input_dim))
            outputs = gru(packets)
            loss += outputs.data
        ben_loss = loss / xtest_ben.shape[0]
        print("ben_loss: %f" % (ben_loss))
        
        # Calculation Precision Recall F-score
        positive_sum = 6001
        positive_num = 0
        FP = 0
        label_list = []
        for i in range(xtest_mal.shape[0]):
            packets = Variable(xtest_mal[i].view(-1, 1, input_dim))
            outputs = gru(packets)
            if outputs.data > ben_loss:
                label_list.append('1')
                if ytest_mal[i][0] == 0:
                    FP += 1
                elif ytest_mal[i][0] == 1:
                    positive_num += 1
            else:
                label_list.append('0')
        
        # Record the label of this iteration
        label_str = ''.join(label_list)
        file = open('6001_mirai_labels_gru_detection.txt','a')
        file.write(label_str + '\n')
        file.close()
        
        if positive_num == 0:
            Precision = 0
            Recall = 0
            F_score = 0
        else:
            Precision = positive_num / (positive_num + FP)
            Recall = positive_num / positive_sum
            F_score = 2 * Recall * Precision / (Recall + Precision)
        print('Precision: {0:0.4f}'.format(Precision)) 
        print('Recall: {0:0.4f}'.format(Recall)) 
        print('F-score: {0:0.4f}'.format(F_score)) 
        print('Iteration: {}. Loss: {}.'.format(epoch, loss_list[-1]))

ben_loss: -0.105152
Precision: 1.0000
Recall: 0.9305
F-score: 0.9640
Iteration: 5000. Loss: 0.0.


KeyboardInterrupt: 

In [63]:
#
torch.manual_seed(125)
Precision: 1.0000
Recall: 0.9305
F-score: 0.9640
Iteration: 5000. Loss: 0.0.

5584
