In [1]:
max_vector_len=1350

In [5]:
import csv
import numpy as np
import math
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

Tensor = torch.FloatTensor    

torch.manual_seed(0) 

class GRUModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, bias=True):
        super(GRUModel, self).__init__()
        # Hidden dimensions
        self.hidden_dim = hidden_dim         
        # Number of hidden layers
        self.layer_dim = layer_dim       
        self.gruLayer = nn.GRU(input_dim, hidden_dim, layer_dim)
        self.fcLayer = nn.Linear(hidden_dim, output_dim) 
    
    def forward(self, x):
        #print("x.shape ",x.shape) 
        out, _ = self.gruLayer(x)      
        out = self.fcLayer(out) 
        return out

def load_data(filename):
    x = []
    with open(filename,'r') as data:
        lines = data.readlines()
        for line in lines:
            line=line.strip('\n')
            x.append(list(line))
    x = np.array(x,dtype=np.float64)
    return x

'''
STEP 1: LOADING DATASET
'''
xmal = load_data('Mirai_vector_mix_60002.txt')
ymal = []
with open("130001_labels_mirai(insert_all_FP).tsv",'r') as x:
    line1=x.readlines()
    for i in range(69999, 130001): 
        if line1[i].find('1') != -1:
            ymal.append(1)
        elif line1[i].find('2') != -1:
            ymal.append(2)
ymal = np.array(ymal)[:, np.newaxis] 
xben = load_data('Mirai_vector_ben_69999.txt')
yben = np.zeros((xben.shape[0], 1), dtype=np.int)
# Malicious packets are divided into training set and test set
train_size_mal = int(30001 * 0.8)
cnt = 0
mix_train_size_mal = 0
with open("130001_labels_mirai(insert_all_FP).tsv",'r') as x:
    line1=x.readlines()
    for i in range(69999, 130001): 
        mix_train_size_mal += 1
        if line1[i].find('1') != -1:
            cnt += 1
        if cnt == train_size_mal:
            break
xtest_mal = xmal[mix_train_size_mal:, :]
ytest_mal = ymal[mix_train_size_mal:, :]
# Benign packets are divided into training set and test set
train_size_ben = int(xben.shape[0] * 0.8) # 69999*0.8=55999
xtrain_ben = xben[0:train_size_ben, :]
xtest_ben = xben[train_size_ben:, :]

xtest_mal = Tensor(xtest_mal)
xtrain_ben = Tensor(xtrain_ben)
xtest_ben = Tensor(xtest_ben)

'''
STEP 2: INSTANTIATE MODEL CLASS
'''
input_dim = max_vector_len
hidden_dim = int(math.sqrt(input_dim+1)+10)
layer_dim = 2  
output_dim = 1

num_epochs = 50000 # 50000<55999
print_interval = num_epochs / 10

gru = GRUModel(input_dim, hidden_dim, layer_dim, output_dim)
    
'''
STEP 3: INSTANTIATE LOSS CLASS
'''
criterion = nn.CrossEntropyLoss()
 
'''
STEP 4: INSTANTIATE OPTIMIZER CLASS
'''
learning_rate = 0.1
optimizer = optim.SGD(gru.parameters(), lr=learning_rate)

'''
STEP 5: TRAIN THE MODEL
'''
loss_list = []
for epoch in range(1, num_epochs+1):

    idx = epoch - 1
    xben_batch = xtrain_ben[idx]
    packets = Variable(xben_batch.view(-1, 1, input_dim)) 
    labels = Variable(Tensor([[0]]).long())

    # Clear gradients w.r.t. parameters
    optimizer.zero_grad()    
    # Forward pass to get output/logits
    outputs = gru(packets)
    #print("outputs.size ",outputs.size())
    # Calculate Loss: softmax --> cross entropy loss
    loss = criterion(outputs, labels)
    # Getting gradients w.r.t. parameters
    loss.backward()
    # Updating parameters
    optimizer.step()        
    loss_list.append(loss.item())
         
    if epoch % print_interval == 0:
        #gru.eval()
        # benign
        loss = 0
        for i in range(xtest_ben.shape[0]):
            packets = Variable(xtest_ben[i].view(-1, 1, input_dim))
            outputs = gru(packets)
            loss += outputs.data
        ben_loss = loss / xtest_ben.shape[0]
        print("ben_loss: %f" % (ben_loss))
        
        # Calculation Precision Recall F-score MRR
        positive_sum = 6001 
        positive_num = 0
        FP = 0
        label_list = []
        for i in range(xtest_mal.shape[0]):
            packets = Variable(xtest_mal[i].view(-1, 1, input_dim))
            outputs = gru(packets)
            if outputs.data > ben_loss:
                label_list.append('1')
                if ytest_mal[i][0] == 0 or ytest_mal[i][0] == 2:
                    FP += 1
                elif ytest_mal[i][0] == 1:
                    positive_num += 1
            else:
                label_list.append('0')
        
        # Record the label of this iteration
        label_str = ''.join(label_list)
        file = open('6001_mirai_labels_gru.txt','a')
        file.write(label_str + '\n')
        file.close()
        
        if positive_num == 0:
            Precision = 0
            Recall = 0
            F_score = 0
        else:
            Precision = positive_num / (positive_num + FP)
            Recall = positive_num / positive_sum
            F_score = 2 * Recall * Precision / (Recall + Precision)
        origin_mal_num = 5584 
        MRR = (origin_mal_num - positive_num) / origin_mal_num
        print('Precision: {0:0.4f}'.format(Precision)) 
        print('Recall: {0:0.4f}'.format(Recall)) 
        print('F-score: {0:0.4f}'.format(F_score)) 
        print('MRR: {0} - {1} / {2} = {3:0.4f}'.format(origin_mal_num,positive_num,origin_mal_num,MRR))
        print('Iteration: {}. Loss: {}.'.format(epoch, loss_list[-1]))

ben_loss: 0.073304
Precision: 0.5083
Recall: 0.9487
F-score: 0.6619
MRR: 5584 - 5693 / 5584 = -0.0195
Iteration: 5000. Loss: 0.0.


KeyboardInterrupt: 

In [None]:
#
torch.manual_seed(200)
ben_loss: -0.015744
Precision: 0.2468
Recall: 0.1516
F-score: 0.1879
MRR: 5584 - 910 / 5584 = 0.8370
Iteration: 5000. Loss: 0.0.
#
torch.manual_seed(125)
ben_loss: -0.007552
Precision: 0.2700
Recall: 0.1650
F-score: 0.2048
MRR: 5584 - 990 / 5584 = 0.8227
Iteration: 5000. Loss: 0.0.
#
torch.manual_seed(100)
ben_loss: 0.095623
Precision: 0.2039
Recall: 0.1861
F-score: 0.1946
MRR: 5584 - 1117 / 5584 = 0.8000
#
torch.manual_seed(50)
ben_loss: 0.067540
Precision: 0.1341
Recall: 0.0378
F-score: 0.0590
MRR: 5584 - 227 / 5584 = 0.9593
Iteration: 5000. Loss: 0.0.
# *
torch.manual_seed(0)
ben_loss: 0.073304
Precision: 0.5083
Recall: 0.9487
F-score: 0.6619
MRR: 5584 - 5693 / 5584 = -0.0195
Iteration: 5000. Loss: 0.0.