In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2

In [2]:
height = 64
width = 128
size = height*width
path = "..\..\datasets\CIC-IDS-2017"
csv_file = path + "\cicids2017_img_serialized_5.csv"

In [3]:
df = pd.read_csv(path+"\CICIDS_converted_data.csv")

In [4]:
df = df.drop_duplicates()

In [None]:
df.info()

In [None]:
df.columns

In [None]:
df.protocol.unique()

In [8]:
df.protocol = df.protocol.apply(lambda x: 1 if x == "tcp" else 0 )

In [None]:
df.protocol.unique()

In [None]:
LABELS = df.label.unique()
LABELS

In [None]:
len(df.label.unique())

In [None]:
df.label.value_counts() / len(df)

In [13]:
df = pd.get_dummies(df, columns=['label'])

In [None]:
df = df.to_numpy()
df.shape

In [None]:
df.dtype, df.nbytes

In [16]:
# normalize payload, ttl, total_len
for col_index in range(df.shape[1]-17):
    column = df[:, col_index]
    column_normalized = (column - np.min(column)) / (np.max(column) - np.min(column))
    df[:, col_index] = column_normalized

In [None]:
# inspect t_delta
print(np.min(df[:, -16]), np.max(df[:, -16]))

In [18]:
# normalize t_delta
column = df[:, -16]
column_normalized = (column - np.min(column)) / (np.max(column) - np.min(column))
df[:, -16] = column_normalized

In [None]:
df = df.astype(np.float16)
df.dtype, df.nbytes

In [None]:
np.random.shuffle(df)
df.shape, df[0, -15:]

In [None]:
def translate_encoded_label(encoded_label):
        return LABELS[list(encoded_label).index(1)]
    
translate_encoded_label(df[0, -15:])

In [None]:
def convert_dataset_to_grayscale_image(arr):
    for idx in range(len(arr) - 5):
        batch = arr[idx: idx + 5, :-15]
        label = translate_encoded_label(arr[idx + 5, -15:])
        print(batch.shape, label)
        data = np.concatenate(batch)
        print(data.shape)
        print(label)
        
        data = np.pad(data, pad_width=int((size-len(data))/2), constant_values=0)
        print(data.shape)
        data = data.reshape(height, width).astype('float64')
        print(data.shape)
        
        plt.imshow(data, cmap='gray')
        plt.axis('off')  # Remove axes
        plt.show()
        
        break
    print("DONE")
        
convert_dataset_to_grayscale_image(df)

In [None]:
def convert_dataset_to_rgb_image(arr):
    for idx in range(len(arr) - 5):
        batch = arr[idx: idx + 5, :-15]
        label = translate_encoded_label(arr[idx + 5, -15:])
        print(batch.shape, label)
        data = np.concatenate(batch)
        print(data.shape)
        print(label)
        
        data = np.pad(data, pad_width=int((size-len(data))/2), constant_values=0)
        data = data.reshape(height, width)
        
        channel_1 = data.astype('float64')
        print(channel_1.shape)
        channel_2 = np.rot90(channel_1, k=2).reshape(height, width)
        print(channel_2.shape)
        channel_3 = np.rot90(channel_2, k=2).reshape(height, width)
        print(channel_3.shape)
        img = np.stack((channel_1, channel_2, channel_3)).transpose((1, 2, 0))
        print(img.shape)
        
        plt.imshow(img)
        plt.axis('off')  # Remove axes
        plt.show()
        
        break
    print("DONE")
        
convert_dataset_to_rgb_image(df)

In [None]:
with open(csv_file, 'w') as f:
    np.savetxt(f, [np.array(["file_name", "label"])], delimiter=',', fmt='%s')

def convert_dataset_to_image(arr):
    with open(csv_file, 'a') as f:
        for idx in range(len(arr) - 5):
            batch = arr[idx: idx + 5, :-15]
            label = translate_encoded_label(arr[idx + 5, -15:])
            data = np.concatenate(batch)
            if idx % 10_000 == 0:
                    print(idx, label)
            
            data = np.pad(data, pad_width=int((size-len(data))/2), constant_values=0)
            data = data.reshape(height, width)
            
            channel_1 = data.astype('float64')
            channel_2 = np.rot90(channel_1, k=2).reshape(height, width)
            channel_3 = np.rot90(channel_2, k=2).reshape(height, width)
            img = np.stack((channel_1, channel_2, channel_3)).transpose((1, 2, 0))
            
            file_name = f"cic_ids_2017_{idx}.png"
            cv2.imwrite(path+"\image_serialized_5\\"+file_name, img*255)
            
            log = np.array([file_name, label])
            np.savetxt(f, [log], delimiter=',', fmt='%s')
    print("DONE")
        
convert_dataset_to_image(df)