In [1]:
import json
import math

import numpy as np
import pandas as pd
from scapy.all import rdpcap
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

from IAT_dataparser import IATDataParser
from PL_dataparser import PLDataParser

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# Defining the model
class LSTMPredictor(nn.Module):
    def __init__(self, n_hidden=200):
        super(LSTMPredictor, self).__init__()

        self.n_hidden = n_hidden

        # Defining 2 LSTMS stacked on top of each other
        self.lstm1 = nn.LSTMCell(1, self.n_hidden)
        self.lstm2 = nn.LSTMCell(self.n_hidden, self.n_hidden)
        self.linear = nn.Linear(self.n_hidden, 1)

    def forward(self, input, prediction_window=0):
        outputs = []
        n_samples = input.size(0)

        # Initializing the hidden states and cell states
        h_t = torch.zeros(n_samples, self.n_hidden, dtype=torch.float32)
        c_t = torch.zeros(n_samples, self.n_hidden, dtype=torch.float32)
        h_t2 = torch.zeros(n_samples, self.n_hidden, dtype=torch.float32)
        c_t2 = torch.zeros(n_samples, self.n_hidden, dtype=torch.float32)

        # Split input tensor into chunks of size, 1, row-wise
        for input_t in input.split(1, dim=1):
            h_t, c_t = self.lstm1(input_t, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs.append(output)

        for i in range(prediction_window):
            h_t, c_t = self.lstm1(output, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs.append(output)

        # Concatentating all outputs, history + future, row-wise
        outputs = torch.cat(outputs, dim=1)
        return outputs

In [3]:
# Dataset generation
data_path = "../data/pcap/MIRAGE-COVID-CCMA-2022/Raw_JSON/Teams/1619005750_com.microsoft.teams_mirage2020dataset_labeled_biflows_all_packets_encryption_metadata.json"
# data_path = '../data/pcap/MIRAGE-COVID-CCMA-2022/Raw_JSON/Teams/1619782605_com.microsoft.teams_mirage2020dataset_labeled_biflows_all_packets_encryption_metadata.json'

# Instantiate DataParser class
data_parser = IATDataParser(data_path)

# Reading the biflow data
biflow_data = data_parser.biflow_data

biflows = list(biflow_data.keys())

print("biflows: ", biflows)
print('number of biflows: ', len(biflows))

test_IAT = data_parser.generate_debug_set()
test_IAT = data_parser.saturate_99(test_IAT)
test_IAT = data_parser.minmax_scaler(test_IAT)

# DEFINE WINDOWS AND BATCH SIZE HERE
memory_window = 30 # 500
prediction_window = 1
# batch_size = 32

# memory_window_set, gd_truth_set = data_parser.create_sequences(
#     all_IAT['Interarrival_scaled'], memory_window, prediction_window)  # Splitting data into memory_window and prediction_window (ground truth)

memory_window_set, gd_truth_set = data_parser.create_sequences(
    test_IAT['Interarrival_scaled'], memory_window, prediction_window)  # Splitting data into memory_window and prediction_window (ground truth)

print(f"Input shape: {memory_window_set.shape}, Output shape: {gd_truth_set.shape}")

# Defining training inputs and outputs. Defining testing inputs and outputs
split_index = int(len(memory_window_set) * 0.8)

# Perform the split
train_input = torch.from_numpy(memory_window_set[:split_index, :-1]).float()
train_target = torch.from_numpy(memory_window_set[:split_index, 1:]).float()
test_input = torch.from_numpy(memory_window_set[split_index:, :-1]).float()
test_target = torch.from_numpy(memory_window_set[split_index:, 1:]).float()

print("Train input shape", train_input.shape)
print("Train target shape", train_target.shape)
print("Test input shape", test_input.shape)
print("Test input shape", test_target.shape)

biflows:  ['192.168.20.111,68,192.168.20.254,67,17', '192.168.20.111,51560,8.8.4.4,853,6', '192.168.20.111,46279,8.8.8.8,853,6', '192.168.20.111,55009,142.250.180.68,443,6', '192.168.20.111,55586,142.250.184.35,80,6', '192.168.20.111,51564,8.8.4.4,853,6', '192.168.20.111,40978,108.177.127.188,5228,6', '192.168.20.111,38155,142.250.184.36,443,17', '192.168.20.111,38544,142.250.184.36,443,6', '192.168.20.111,39646,172.217.21.67,443,6', '192.168.20.111,33755,142.250.184.42,443,6', '192.168.20.111,32786,216.58.205.74,443,6', '192.168.20.111,41662,216.58.206.42,443,6', '192.168.20.111,41663,216.58.206.42,443,6', '192.168.20.111,41513,216.58.198.46,443,6', '192.168.20.111,41515,216.58.198.46,443,6', '192.168.20.111,37742,142.250.180.74,443,17', '192.168.20.111,57411,142.250.180.74,443,17', '192.168.20.111,36736,142.250.180.74,443,6', '192.168.20.111,36737,142.250.180.74,443,6', '192.168.20.111,51073,216.58.198.42,443,17', '192.168.20.111,58540,216.58.198.42,443,6', '192.168.20.111,50145,216.

In [4]:
# Initialising model hyper-parameters
# Initialising model, loss and optimizer
model = LSTMPredictor()
criterion = nn.MSELoss()
optimizer = optim.LBFGS(model.parameters(), lr=0.4)

In [5]:
# Initializing training steps
trg_steps = 50

for i in range(trg_steps):
    print("Step: ", i)

    def closure():
        optimizer.zero_grad()
        out = model(train_input)
        loss = criterion(out, train_target)
        print("loss: ", loss.item())
        loss.backward()
        return loss

    optimizer.step(closure)

    with torch.no_grad():
        future = 10
        # Pred includes future values
        pred = model(test_input, prediction_window=future)
        loss = criterion(pred[:, :-future], test_target)
        print("Test Loss: ", loss.item())
        y = pred.detach().numpy()
        n = train_input.shape[1]

        plt.figure(figsize=(12, 6))
        plt.title(f"Step {i+1}")
        plt.xlabel("x")
        plt.ylabel("y")
        plt.xticks(fontsize=20)
        plt.yticks(fontsize=20)

        n = train_input.shape[1]  # 999
        print("n: ", n)
        print("y shape: ", y.shape)
        print("y: ", y)
        print("y row len: ", len(y[0]))

        def draw(y_i, color):
            plt.plot(np.arange(n), y_i[:n], color, linewidth=2.0)
            plt.plot(np.arange(n, n+future), y_i[n:], color + ":", linewidth=2.0)
            print("y_i[:n]: ", y_i[:n])
            print("y_i[n:]: ", y_i[n:])

        draw(y[0], 'r')
        # draw(y[1], 'g')
        # draw(y[2], 'b')

        plt.savefig("predict%d.pdf" % i)
        plt.close()

Step:  0
loss:  0.03355243429541588
loss:  0.030769065022468567
loss:  0.020237887278199196
loss:  0.016444232314825058
loss:  0.015090428292751312
loss:  0.014607328921556473
loss:  0.014434285461902618
loss:  0.014371918514370918
loss:  0.014349165372550488
loss:  0.014340621419250965
loss:  0.014337169006466866
loss:  0.014335528947412968
loss:  0.014334478415548801
loss:  0.014333500526845455
loss:  0.014332255348563194
loss:  0.01433037593960762
loss:  0.01432733703404665
loss:  0.014322335831820965
loss:  0.01431416254490614


KeyboardInterrupt: 