This notebook tries to simulates stage 1 using a LSTM to verify the underlying structure of IP address.

Largely adopted from https://keras.io/examples/timeseries/timeseries_weather_forecasting/
with some reference from https://www.tensorflow.org/tutorials/structured_data/time_series.

In [1]:
import numpy as np
import pandas as pd
import pickle as pkl
from tensorflow import keras
from matplotlib import pyplot as plt

In [2]:
dataset = pd.read_pickle('../data/stage_one_dataset.pickle')

# Shift the label column to match training setting
dataset['label'] = dataset.label.shift(1, fill_value=False) 

In [3]:
split_fraction = 0.7
past = 1000
step=1
future = 1
learning_rate = 0.001
batch_size = 32
epochs = 3
sequence_length = int(past / step)

# Training Dataset
train_split = int(split_fraction * dataset.shape[0])
start = past + future
end = start + train_split

x_train = dataset.loc[0 : train_split - 1].values.astype('float32')
y_train = dataset.iloc[start:end][['label']].astype('float32')

dataset_train = keras.preprocessing.timeseries_dataset_from_array(
    x_train,
    y_train,
    sequence_length=sequence_length,
    sampling_rate=step,
    batch_size=batch_size,
)

# Validation Dataset
val_data = dataset.loc[train_split:]
x_end = len(val_data) - past - future
label_start = train_split + past + future

x_val = val_data.iloc[:x_end].values.astype('float32')
y_val = dataset.iloc[label_start:][['label']].astype('float32')

dataset_val = keras.preprocessing.timeseries_dataset_from_array(
    x_val,
    y_val,
    sequence_length=sequence_length,
    sampling_rate=step,
    batch_size=batch_size,
)

In [4]:
for batch in dataset_train.take(1):
    inputs, targets = batch

print("Input shape:", inputs.numpy().shape)
print("Target shape:", targets.numpy().shape)

Input shape: (32, 1000, 9)
Target shape: (32, 1)


In [None]:
model = keras.Sequential()

model.add(keras.layers.Dense(64))
model.add(keras.layers.GRU(128))
model.add(keras.layers.Dense(1))

model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
model.build(input_shape=inputs.shape)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (32, 1000, 64)            640       
_________________________________________________________________
gru (GRU)                    (32, 128)                 74496     
_________________________________________________________________
dense_1 (Dense)              (32, 1)                   129       
Total params: 75,265
Trainable params: 75,265
Non-trainable params: 0
_________________________________________________________________


In [None]:
path_checkpoint = "model_checkpoint.h5"
es_callback = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=5)

modelckpt_callback = keras.callbacks.ModelCheckpoint(
    monitor="val_loss",
    filepath=path_checkpoint,
    verbose=1,
    save_weights_only=True,
    save_best_only=True,
)

history = model.fit(
    dataset_train,
    epochs=epochs,
    validation_data=dataset_val,
    callbacks=[es_callback, modelckpt_callback],
)

Epoch 1/3
    32/437469 [..............................] - ETA: 46:03:21 - loss: 0.5365

In [None]:
def visualize_loss(history, title):
    loss = history.history["loss"]
    val_loss = history.history["val_loss"]
    epochs = range(len(loss))
    plt.figure()
    plt.plot(epochs, loss, "b", label="Training loss")
    plt.plot(epochs, val_loss, "r", label="Validation loss")
    plt.title(title)
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()


visualize_loss(history, "Training and Validation Loss")

In [None]:
model.predict(inputs)

In [None]:
targets