# Generative Adversarial Network

In this notebook, we'll be building a generative adversarial network (GAN) trained on the network flow dataset.

## Import Libraries

In [1]:
from __future__ import absolute_import, division, print_function

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from importlib import reload
import os
import time
from sklearn.model_selection import train_test_split

import tensorflow as tf
import tensorflow.contrib.eager as tfe

tf.enable_eager_execution()

print("TensorFlow version: {}".format(tf.VERSION))
print("Eager execution: {}".format(tf.executing_eagerly()))

  from ._conv import register_converters as _register_converters


TensorFlow version: 1.8.0
Eager execution: True


In [2]:
import utils, models
reload(utils)
reload(models)
from models import Generator, Discriminator
from utils import max_norm, parse_feature_label, loss, grad, sample_n_number, train_one_epoch

## Define Constants

In [3]:
IDS_DATASET = os.path.join('data', 'ids2017_sampled.csv')
RELEVANT_FEATURES = [' Source Port', ' Destination Port', ' Flow Duration', 'Total Length of Fwd Packets', ' Total Length of Bwd Packets', 'Bwd Packet Length Max', ' Bwd Packet Length Min', 'Flow Bytes/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean', ' Bwd IAT Std', ' Bwd IAT Min', 'Fwd PSH Flags', ' Bwd PSH Flags', ' Fwd URG Flags', ' Fwd Header Length', ' Bwd Packets/s', ' Packet Length Mean', ' ACK Flag Count', ' Down/Up Ratio', ' Avg Fwd Segment Size', ' Fwd Header Length.1', 'Fwd Avg Bytes/Bulk', ' Fwd Avg Packets/Bulk', ' Bwd Avg Bytes/Bulk', 'Bwd Avg Bulk Rate', 'Subflow Fwd Packets', ' Subflow Fwd Bytes', 'Init_Win_bytes_forward', ' act_data_pkt_fwd', ' Active Std', ' Active Min', ' Idle Max']
LABEL_NAME = ' Label'
BENIGN_LABEL = 0
ATTACK_LABEL = 2
TRAIN_FRAC = 0.3
FEATURE_NUM_MODIFIED = 2
LEARNING_RATE = 0.01
OUTPUT_DIR = 'SUMMARY/'
CHECKPOINT_DIR = 'CHECKPOINT/'
EPOCHS = 200
LOG_INTERVAL = 100

## Load and normalize data

In [4]:
# quick view of dataset
!head -n5 {IDS_DATASET}

Flow ID, Source IP, Source Port, Destination IP, Destination Port, Protocol, Timestamp, Flow Duration, Total Fwd Packets, Total Backward Packets,Total Length of Fwd Packets, Total Length of Bwd Packets, Fwd Packet Length Max, Fwd Packet Length Min, Fwd Packet Length Mean, Fwd Packet Length Std,Bwd Packet Length Max, Bwd Packet Length Min, Bwd Packet Length Mean, Bwd Packet Length Std,Flow Bytes/s, Flow Packets/s, Flow IAT Mean, Flow IAT Std, Flow IAT Max, Flow IAT Min,Fwd IAT Total, Fwd IAT Mean, Fwd IAT Std, Fwd IAT Max, Fwd IAT Min,Bwd IAT Total, Bwd IAT Mean, Bwd IAT Std, Bwd IAT Max, Bwd IAT Min,Fwd PSH Flags, Bwd PSH Flags, Fwd URG Flags, Bwd URG Flags, Fwd Header Length, Bwd Header Length,Fwd Packets/s, Bwd Packets/s, Min Packet Length, Max Packet Length, Packet Length Mean, Packet Length Std, Packet Length Variance,FIN Flag Count, SYN Flag Count, RST Flag Count, PSH Flag Count, ACK Flag Count, URG Flag Count, CWE Flag Count, ECE Flag Count, Down/Up Ratio, Average Packet Size, Av

In [5]:
# read csv
df = pd.read_csv(IDS_DATASET)

In [6]:
# extract relevant features and label name
df = df[RELEVANT_FEATURES + [LABEL_NAME]]

In [7]:
# extract bengin and attack flows we want
benign_df, attack_df = df[(df[LABEL_NAME] == BENIGN_LABEL)], df[(df[LABEL_NAME] == ATTACK_LABEL)]

In [8]:
# rewrite label values
benign_df.loc[:, LABEL_NAME] = 0
attack_df.loc[:, LABEL_NAME] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [9]:
# convert to numpy
benign, attack = benign_df.values, attack_df.values

In [10]:
# max normalization
benign[:, :len(RELEVANT_FEATURES)] = max_norm(benign[:, :len(RELEVANT_FEATURES)])
attack[:, :len(RELEVANT_FEATURES)] = max_norm(attack[:, :len(RELEVANT_FEATURES)])

In [11]:
# do train, test split separately on benign and attack
benign_train, benign_test = train_test_split(benign, train_size=TRAIN_FRAC)
attack_train, attack_test = train_test_split(attack, train_size=TRAIN_FRAC)



In [12]:
# concat to get testing data
test_np = np.concatenate([benign_test, attack_test])

## Generate Dataset

In [13]:
# convert to benign train dataset and attack train dataset
benign_train_dataset, attack_train_dataset = tf.data.Dataset.from_tensor_slices(benign_train), tf.data.Dataset.from_tensor_slices(attack_train)

In [14]:
# deal with benign train dataset
benign_train_dataset = benign_train_dataset.map(parse_feature_label)
benign_train_dataset = benign_train_dataset.shuffle(buffer_size=benign_train.shape[0] * 5)  # randomize
benign_train_dataset = benign_train_dataset.batch(100) # make batch

# View a single example entry from a batch
benign_features, benign_label = iter(benign_train_dataset).next()
print("benign example features:", benign_features[0])
print("benign example label:", benign_label[0])

benign example features: tf.Tensor(
[7.88681664e-01 8.26536500e-04 1.48351553e-06 8.54481757e-04
 3.45811052e-06 8.30479452e-03 1.66953528e-01 1.37754068e-02
 9.36037427e-07 8.96711684e-07 1.06779660e-06 7.80031189e-08
 3.33333333e-08 3.38983051e-08 0.00000000e+00 3.38983051e-08
 4.00000000e-07 4.06779661e-07 0.00000000e+00 4.06779661e-07
 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.70189229e-04
 1.12994350e-02 5.88502269e-02 0.00000000e+00 1.42857143e-01
 2.80293230e-02 1.70189229e-04 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 5.36250536e-05 8.54481757e-04
 0.00000000e+00 5.43714659e-05 0.00000000e+00 0.00000000e+00
 0.00000000e+00], shape=(41,), dtype=float64)
benign example label: tf.Tensor(0, shape=(), dtype=int32)


In [15]:
# deal with attack train dataset
attack_train_dataset = attack_train_dataset.map(parse_feature_label)
attack_train_dataset = attack_train_dataset.shuffle(buffer_size=attack_train.shape[0] * 5)  # randomize
attack_train_dataset = attack_train_dataset.batch(100) # make batch

# View a single example entry from a batch
attack_features, attack_label = iter(attack_train_dataset).next()
print("attack example features:", attack_features[0])
print("attack example label:", attack_label[0])

attack example features: tf.Tensor(
[6.27905329e-01 0.00000000e+00 7.11920228e-01 1.35361552e-01
 1.00000000e+00 4.99525658e-01 0.00000000e+00 1.33863937e-05
 1.54295379e-01 3.85665529e-01 7.16101693e-01 5.51146384e-05
 7.10084031e-01 3.07888032e-01 4.71893491e-01 0.00000000e+00
 1.39187395e-03 2.70642157e-04 1.06018873e-03 4.60784314e-07
 0.00000000e+00 0.00000000e+00 0.00000000e+00 5.38461538e-01
 8.26667730e-08 4.96786042e-01 1.00000000e+00 0.00000000e+00
 1.34649123e-01 5.38461538e-01 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 5.00000000e-01 1.35361552e-01
 8.59589041e-03 1.11111111e-01 0.00000000e+00 3.10285625e-02
 7.16101695e-01], shape=(41,), dtype=float64)
attack example label: tf.Tensor(1, shape=(), dtype=int32)


## Build Model

In [16]:
generator = Generator(input_shape=len(RELEVANT_FEATURES), output_shape=FEATURE_NUM_MODIFIED)
discriminator = Discriminator()

## Build Optimizer

In [17]:
generator_optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
discriminator_optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)

## Build Tensorflow Training Environment

In [18]:
step_counter = tf.train.get_or_create_global_step()
summary_writer = tf.contrib.summary.create_file_writer(
      OUTPUT_DIR, flush_millis=1000)
checkpoint_prefix = os.path.join(CHECKPOINT_DIR, 'ckpt')
latest_cpkt = tf.train.latest_checkpoint(CHECKPOINT_DIR)
if latest_cpkt:
    print('Using latest checkpoint at ' + latest_cpkt)
model_objects = {
    'generator': generator,
    'discriminator': discriminator,
    'generator_optimizer': generator_optimizer,
    'discriminator_optimizer': discriminator_optimizer,
    'step_counter': step_counter
}
checkpoint = tfe.Checkpoint(**model_objects)
# Restore variables on creation if a checkpoint exists.
checkpoint.restore(latest_cpkt)

<tensorflow.contrib.eager.python.checkpointable_utils.InitializationOnlyStatus at 0x2ae4163ae668>

## Train

In [19]:
# with tf.device('/gpu:0'):
import utils, models
reload(utils)
reload(models)
from models import Generator, Discriminator
from utils import max_norm, parse_feature_label, loss, grad, sample_n_number, train_one_epoch

for _ in range(EPOCHS):
    start = time.time()
    with summary_writer.as_default():
        train_one_epoch(benign_dataset=benign_train_dataset,
                    attack_dataset=attack_train_dataset,
                    log_interval=LOG_INTERVAL,
                    modified_feature_num=FEATURE_NUM_MODIFIED,
                    **model_objects)
    end = time.time()
    checkpoint.save(checkpoint_prefix)
    print('\nTrain time for epoch #%d (step %d): %f' %
        (checkpoint.save_counter.numpy(),
         checkpoint.step_counter.numpy(),
         end - start))


Train time for epoch #1 (step 15): 2.657003

Train time for epoch #2 (step 30): 1.152268

Train time for epoch #3 (step 45): 1.036232

Train time for epoch #4 (step 60): 1.151313

Train time for epoch #5 (step 75): 1.042687

Train time for epoch #6 (step 90): 1.147477

Train time for epoch #7 (step 105): 1.147046

Train time for epoch #8 (step 120): 1.052617

Train time for epoch #9 (step 135): 1.163255

Train time for epoch #10 (step 150): 1.147148

Train time for epoch #11 (step 165): 1.236153

Train time for epoch #12 (step 180): 1.005451

Train time for epoch #13 (step 195): 1.001777

Train time for epoch #14 (step 210): 1.193196

Train time for epoch #15 (step 225): 1.131258

Train time for epoch #16 (step 240): 1.127695

Train time for epoch #17 (step 255): 1.013791

Train time for epoch #18 (step 270): 1.093749

Train time for epoch #19 (step 285): 1.055559

Train time for epoch #20 (step 300): 1.085957

Train time for epoch #21 (step 315): 1.190973

Train time for epoch #22 (s


Train time for epoch #172 (step 2580): 1.185393

Train time for epoch #173 (step 2595): 1.047542

Train time for epoch #174 (step 2610): 1.074981

Train time for epoch #175 (step 2625): 1.050221

Train time for epoch #176 (step 2640): 1.135132

Train time for epoch #177 (step 2655): 1.004740

Train time for epoch #178 (step 2670): 1.006137

Train time for epoch #179 (step 2685): 1.050610

Train time for epoch #180 (step 2700): 1.135757

Train time for epoch #181 (step 2715): 1.092459

Train time for epoch #182 (step 2730): 0.990758

Train time for epoch #183 (step 2745): 1.024459

Train time for epoch #184 (step 2760): 1.122186

Train time for epoch #185 (step 2775): 1.127692

Train time for epoch #186 (step 2790): 1.017190

Train time for epoch #187 (step 2805): 1.039683

Train time for epoch #188 (step 2820): 1.062749

Train time for epoch #189 (step 2835): 1.104456

Train time for epoch #190 (step 2850): 1.048669

Train time for epoch #191 (step 2865): 1.078442

Train time for epoc