# Covert Channel Machine Learning

## Loading data and preprocessing

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import os
# Location of .obj files in Google Drive
data_location = '/content/drive/MyDrive/CC Machine Learning/'
os.chdir(data_location)

In [None]:
!ls | grep obj

X_60_active.obj
X_60.obj
X_kernel24.obj
X.obj
X_simple.obj
y_60_active.obj
y_60.obj
y_kernel24.obj
y.obj
y_simple.obj


Now we are in the proper location to access the .obj files

In [None]:
import pickle
import numpy as np

X_file = 'X_complex_tcp.obj'
y_file = 'y_complex_tcp.obj'

standard_length = 20

# Load everything like this because pickling buffer has been used
with open(X_file, 'rb') as f:
  # initilize 2D array
  X = np.empty((0, standard_length), dtype=int)
  captures_loaded = 0
  while True:
    try:
      new_arr = pickle.load(f)
      if new_arr.size > standard_length:
        new_arr = new_arr[:standard_length]
      elif new_arr.size < standard_length:
        # pad to required length
        while new_arr.size < standard_length:
          new_arr = np.append(new_arr, 0)

      # add to our main 2D array
      X = np.vstack([X, new_arr])
      captures_loaded += 1
    except EOFError:
      break

print(f'Captures loaded: {captures_loaded}')
print(f'X shape: {X.shape}')

# Load y normally
with open(y_file, 'rb') as f:
  y = np.empty(0)
  while True:
    try:
      y = np.append(y, pickle.load(f))
    except EOFError:
        break

print(f'y shape: {y.shape}')

Captures loaded: 2000
X shape: (2000, 20)
y shape: (2000,)


* X - 2D numpy array (each capture, each packet in that capture)
* y - 1D numpy array for targets (target)

In [None]:
# Now need to convert y to categorical
from keras.utils import to_categorical
y = to_categorical(y)

In [None]:
# Print shape information
print(X.shape, '\n', y.shape, sep='')

(2000, 20)
(2000, 2)


In [None]:
# Generate train and test datasets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.1)

In [None]:
# Scale data to fit between 0 and 1 range
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

scaler.fit(X_train)
scaler.fit(X_test)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

## Building the network with Keras

In [None]:
# Import Keras elements needed for the model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import BatchNormalization
from keras.layers import GlobalMaxPooling1D
from keras.layers import LSTM
from keras.layers import Bidirectional
from keras.layers import InputLayer
from keras.layers import Add
from keras.layers import LayerNormalization
from keras.layers import MultiHeadAttention
from keras.optimizers import Adam
import tensorflow as tf

In [None]:
# Define important dimensions and other settings to be used in building the model
# TODO: maybe need to set n_packets to the max amount of packets in one of the captures and then fill 0s for captures with less packets than that
n_isns = X.shape[1]                   # max no. of packets in one capture
n_outputs = y_train.shape[1]          # no. of questions
verbose = 0
epochs = 10
batch_size = 32

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, BatchNormalization, MaxPooling1D, Dropout, LSTM, Dense, Bidirectional, InputLayer

# Define the input shape and number of outputs
sequence_length = n_isns  # Replace with your sequence length
n_outputs = 2  # Since you have two classes

# Create the model
model = Sequential()

# Input Layer
model.add(InputLayer(input_shape=(sequence_length, 1)))

# First Conv Block
model.add(Conv1D(filters=64, kernel_size=5, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv1D(filters=64, kernel_size=5, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.3))

# Second Conv Block
model.add(Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.4))

# Third Conv Block
model.add(Conv1D(filters=256, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv1D(filters=256, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.5))

# Bidirectional LSTM Layers
model.add(Bidirectional(LSTM(128, return_sequences=True)))
model.add(Dropout(0.5))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.5))

# Fully Connected Layers
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))

# Output Layer
model.add(Dense(n_outputs, activation='softmax'))

# Compile the Model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Summary of the Model
model.summary()




In [None]:
# TESTING THE MODEL
import time

repeats = 10
total = 0
total_time = 0
for i in range(repeats):
    print("Test", i+1, "... ", end="")
    start_time = time.time()
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)
    accuracy = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=verbose)[1]
    time_elapsed = time.time() - start_time
    print(f"{accuracy*100:.2f}% \t Time: {time_elapsed:.2f}s")
    total += accuracy*100
    total_time += time_elapsed
total_accuracy = total/repeats
print(f"Average accuracy: {total_accuracy:.2f}%")
print(f"Average time: {total_time/repeats:.2f}s")



Test 1 ... 48.00% 	 Time: 56.24s
Test 2 ... 46.00% 	 Time: 39.64s
Test 3 ... 53.00% 	 Time: 44.47s
Test 4 ... 52.50% 	 Time: 39.34s
Test 5 ... 49.00% 	 Time: 43.85s
Test 6 ... 49.50% 	 Time: 36.99s
Test 7 ... 49.00% 	 Time: 45.43s
Test 8 ... 49.00% 	 Time: 48.51s
Test 9 ... 50.50% 	 Time: 40.67s
Test 10 ... 49.00% 	 Time: 41.84s
Average accuracy: 49.55%
Average time: 43.70s
