In [1]:
# Copy the preprocess module to the current directory

%%shell
DIRECTORY="/content/drive/My Drive/Colab Notebooks/Research_project"
if [ -d "$DIRECTORY" ]; then
  cp "$DIRECTORY/preprocess.py" .
else
  echo "$DIRECTORY not found"
fi



In [2]:
import preprocess
import tensorflow as tf

In [3]:
# Preprocess the dataset

feature_columns, labels, train_dfs, test_dfs = preprocess.load_data(
    data_path = "/content/drive/My Drive/Colab Notebooks/Research_project/data",
    train_csv = ['UDPLag.csv', 'Syn.csv', 'DrDoS_UDP.csv', 'DrDoS_NetBIOS.csv', 'DrDoS_MSSQL.csv', 'DrDoS_LDAP.csv'],
    test_csv = ['Syn.csv', 'UDPLag.csv', 'UDP.csv', 'LDAP.csv', 'MSSQL.csv', 'NetBIOS.csv'],
    chunk_size=9**6
)

In [4]:
# Instantiate the model

classifier = tf.estimator.DNNClassifier(
        hidden_units=[60, 30, 20],
        feature_columns=feature_columns,
        n_classes=len(labels),
        label_vocabulary=labels,
        batch_norm=True,
        optimizer=lambda: tf.keras.optimizers.Adam(
            learning_rate=tf.compat.v1.train.exponential_decay(
                learning_rate=0.1,
                global_step=tf.compat.v1.train.get_global_step(),
                decay_steps=10000,
                decay_rate=0.96)
        )
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpm5xugs3i', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [5]:
def input_fn(df, training, batch_size=32):
    '''
    An input function for training or evaluating
    '''
    # Convert the inputs to a Dataset
    dataset = tf.data.Dataset.from_tensor_slices((dict(df["features"]), df["labels"]))
    # Shuffle and repeat if you are in training mode
    if training:
      dataset = dataset.shuffle(1000).repeat()
    return dataset.batch(batch_size)

In [6]:
# Train the model

for train_df in train_dfs:
  classifier.train(input_fn=lambda: input_fn(train_df, training=True), steps=10**4)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
INFO:tensorflow:loss = 0.36286306, step = 71100 (0.179 sec)
INFO:tensorflow:global_step/sec: 549.236
INFO:tensorflow:loss = 0.15845478, step = 71200 (0.183 sec)
INFO:tensorflow:global_step/sec: 562.679
INFO:tensorflow:loss = 0.21790843, step = 71300 (0.177 sec)
INFO:tensorflow:global_step/sec: 550.097
INFO:tensorflow:loss = 0.42392632, step = 71400 (0.181 sec)
INFO:tensorflow:global_step/sec: 559.951
INFO:tensorflow:loss = 0.77013093, step = 71500 (0.179 sec)
INFO:tensorflow:global_step/sec: 530.152
INFO:tensorflow:loss = 0.27881876, step = 71600 (0.188 sec)
INFO:tensorflow:global_step/sec: 561.765
INFO:tensorflow:loss = 0.7776079, step = 71700 (0.178 sec)
INFO:tensorflow:global_step/sec: 539.699
INFO:tensorflow:loss = 0.43421307, step = 71800 (0.185 sec)
INFO:tensorflow:global_step/sec: 558.568
INFO:tensorflow:loss = 0.33803508, step = 71900 (0.179 sec)
INFO:tensorflow:global_step/sec: 552.89
INFO:tensorflow:loss = 0.280

In [7]:
# Test the model

metrics = []
for file_test_df in test_dfs:
  file_measures = [] 
  for test_df in file_test_df["dataframe"]:
    file_measures.append(classifier.evaluate(input_fn=lambda: input_fn(test_df, training=False)))
  metrics.append({
      "file": file_test_df["file"],
      "measures": file_measures
  })

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-06-27T06:29:00Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpm5xugs3i/model.ckpt-300000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 18.51006s
INFO:tensorflow:Finished evaluation at 2020-06-27-06:29:18
INFO:tensorflow:Saving dict for global step 300000: accuracy = 0.9173229, average_loss = 146774.6, global_step = 300000, loss = 146770.45
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 300000: /tmp/tmpm5xugs3i/model.ckpt-300000
INFO:tensorflow:Calli

In [8]:
metrics

[{'file': '03-11/UDPLag.csv',
  'measures': [{'accuracy': 0.9173229,
    'average_loss': 146774.6,
    'global_step': 300000,
    'loss': 146770.45},
   {'accuracy': 0.9984153,
    'average_loss': 22.906052,
    'global_step': 300000,
    'loss': 22.905579}]},
 {'file': '03-11/UDP.csv',
  'measures': [{'accuracy': 0.7322243,
    'average_loss': 563.4279,
    'global_step': 300000,
    'loss': 563.4121},
   {'accuracy': 0.7291308,
    'average_loss': 8.470336,
    'global_step': 300000,
    'loss': 8.47011},
   {'accuracy': 0.73571104,
    'average_loss': 0.4749504,
    'global_step': 300000,
    'loss': 0.4749469},
   {'accuracy': 0.7243024,
    'average_loss': 1848.5493,
    'global_step': 300000,
    'loss': 1848.4971},
   {'accuracy': 0.7192614,
    'average_loss': 6.5206227,
    'global_step': 300000,
    'loss': 6.5204544},
   {'accuracy': 0.72939044,
    'average_loss': 1957.9305,
    'global_step': 300000,
    'loss': 1957.8752},
   {'accuracy': 0.7255688,
    'average_loss': 10

In [9]:
EXPORT = True
if EXPORT:
  serving_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
    tf.feature_column.make_parse_example_spec(feature_columns))
  # Save the model
  estimator_path = classifier.export_saved_model("/content/drive/My Drive/Colab Notebooks/Research_project/", serving_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.
INFO:tensorflow:Signatures INCLUDED in export for Classify: ['serving_default', 'classification']
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']
INFO:tensorflow:Signatures INCLUDED in export for Train: None
INFO:tensorflow:Signatures INCLUDED in export for Eval: None
INFO:tensorflow:Restoring parameters from /tmp/tmpm5xugs3i/model.ckpt-300000
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: /content/drive/My Drive/Colab Notebooks/Research_project/temp-1593240092/saved_model.pb


In [10]:
estimator_path

b'/content/drive/My Drive/Colab Notebooks/Research_project/1593240092'

# References
* https://www.tensorflow.org/tutorials/load_data/pandas_dataframe
* https://www.tensorflow.org/tutorials/structured_data/feature_columns
* https://www.tensorflow.org/tutorials/estimator/premade
* https://www.tensorflow.org/datasets/performances
* https://www.tensorflow.org/guide/data#batching_dataset_elements