This notebook tries to simulates stage 1 using a LSTM to verify the underlying structure of IP address.

Largely adopted from https://keras.io/examples/timeseries/timeseries_weather_forecasting/
with some reference from https://www.tensorflow.org/tutorials/structured_data/time_series.

In [1]:
import numpy as np
import pandas as pd
import pickle as pkl
from tensorflow import keras

In [2]:
dataset = pd.read_pickle('../data/stage_one_dataset.pickle')

In [3]:
dataset

Unnamed: 0,0,1,2,3,4,5,6,7,label
0,96,171,10,219,230,138,193,153,False
1,227,220,46,221,146,156,6,102,False
2,204,166,96,1,60,197,47,108,False
3,36,226,252,43,2,82,69,117,False
4,236,167,96,1,3,101,88,202,False
...,...,...,...,...,...,...,...,...,...
19999995,54,224,252,43,202,247,161,113,False
19999996,40,193,96,1,37,121,8,119,False
19999997,144,196,76,1,74,187,57,99,False
19999998,209,241,214,199,197,178,232,74,False


In [4]:
split_fraction = 0.7
past = 30
step=1
future = 1
learning_rate = 0.001
batch_size = 256
epochs = 10
sequence_length = int(past / step)

# Training Dataset
train_split = int(split_fraction * dataset.shape[0])
start = past + future
end = start + train_split

x_train = dataset.loc[0 : train_split - 1].values.astype('float32')
y_train = dataset.iloc[start:end][['label']].astype('float32')

dataset_train = keras.preprocessing.timeseries_dataset_from_array(
    x_train,
    y_train,
    sequence_length=sequence_length,
    sampling_rate=step,
    batch_size=batch_size,
)

# Validation Dataset
val_data = dataset.loc[train_split:]
x_end = len(val_data) - past - future
label_start = train_split + past + future

x_val = val_data.iloc[:x_end].values.astype('float32')
y_val = dataset.iloc[label_start:][['label']].astype('float32')

dataset_val = keras.preprocessing.timeseries_dataset_from_array(
    x_val,
    y_val,
    sequence_length=sequence_length,
    sampling_rate=step,
    batch_size=batch_size,
)

In [6]:
for batch in dataset_train.take(1):
    inputs, targets = batch

print("Input shape:", inputs.numpy().shape)
print("Target shape:", targets.numpy().shape)

Input shape: (256, 30, 9)
Target shape: (256, 1)


In [7]:
inputs = keras.layers.Input(shape=(inputs.shape[1], inputs.shape[2]))
lstm_out = keras.layers.LSTM(32)(inputs)
outputs = keras.layers.Dense(1)(lstm_out)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 30, 9)]           0         
_________________________________________________________________
lstm (LSTM)                  (None, 32)                5376      
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 5,409
Trainable params: 5,409
Non-trainable params: 0
_________________________________________________________________


In [None]:
path_checkpoint = "model_checkpoint.h5"
es_callback = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=5)

modelckpt_callback = keras.callbacks.ModelCheckpoint(
    monitor="val_loss",
    filepath=path_checkpoint,
    verbose=1,
    save_weights_only=True,
    save_best_only=True,
)

history = model.fit(
    dataset_train,
    epochs=epochs,
    validation_data=dataset_val,
    callbacks=[es_callback, modelckpt_callback],
)

Epoch 1/10

In [None]:
def visualize_loss(history, title):
    loss = history.history["loss"]
    val_loss = history.history["val_loss"]
    epochs = range(len(loss))
    plt.figure()
    plt.plot(epochs, loss, "b", label="Training loss")
    plt.plot(epochs, val_loss, "r", label="Validation loss")
    plt.title(title)
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()


visualize_loss(history, "Training and Validation Loss")