# Generative Adversarial Network

In this notebook, we'll be building a generative adversarial network (GAN) trained on the network flow dataset.

## Import Libraries

In [1]:
from __future__ import absolute_import, division, print_function

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from importlib import reload
import os
import time
from sklearn.model_selection import train_test_split

import tensorflow as tf
import tensorflow.contrib.eager as tfe

tf.enable_eager_execution()

print("TensorFlow version: {}".format(tf.VERSION))
print("Eager execution: {}".format(tf.executing_eagerly()))

  from ._conv import register_converters as _register_converters


TensorFlow version: 1.8.0
Eager execution: True


In [20]:
import utils, models
reload(utils)
reload(models)
from models import Generator, Discriminator
from utils import max_norm, parse_feature_label, train_one_epoch

## Define Constants

In [3]:
IDS_DATASET = os.path.join('data', 'ids2017_sampled.csv')
RELEVANT_FEATURES = [' Source Port', ' Destination Port', ' Flow Duration', 'Total Length of Fwd Packets', ' Total Length of Bwd Packets', 'Bwd Packet Length Max', ' Bwd Packet Length Min', 'Flow Bytes/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean', ' Bwd IAT Std', ' Bwd IAT Min', 'Fwd PSH Flags', ' Bwd PSH Flags', ' Fwd URG Flags', ' Fwd Header Length', ' Bwd Packets/s', ' Packet Length Mean', ' ACK Flag Count', ' Down/Up Ratio', ' Avg Fwd Segment Size', ' Fwd Header Length.1', 'Fwd Avg Bytes/Bulk', ' Fwd Avg Packets/Bulk', ' Bwd Avg Bytes/Bulk', 'Bwd Avg Bulk Rate', 'Subflow Fwd Packets', ' Subflow Fwd Bytes', 'Init_Win_bytes_forward', ' act_data_pkt_fwd', ' Active Std', ' Active Min', ' Idle Max']
LABEL_NAME = ' Label'
BENIGN_LABEL = 0
ATTACK_LABEL = 2
TRAIN_FRAC = 0.3
FEATURE_NUM_MODIFIED = 2
LEARNING_RATE = 0.01
OUTPUT_DIR = 'SUMMARY/'
CHECKPOINT_DIR = 'CHECKPOINT/'
EPOCHS = 200
LOG_INTERVAL = 100

## Load and normalize data

In [4]:
# quick view of dataset
!head -n5 {IDS_DATASET}

Flow ID, Source IP, Source Port, Destination IP, Destination Port, Protocol, Timestamp, Flow Duration, Total Fwd Packets, Total Backward Packets,Total Length of Fwd Packets, Total Length of Bwd Packets, Fwd Packet Length Max, Fwd Packet Length Min, Fwd Packet Length Mean, Fwd Packet Length Std,Bwd Packet Length Max, Bwd Packet Length Min, Bwd Packet Length Mean, Bwd Packet Length Std,Flow Bytes/s, Flow Packets/s, Flow IAT Mean, Flow IAT Std, Flow IAT Max, Flow IAT Min,Fwd IAT Total, Fwd IAT Mean, Fwd IAT Std, Fwd IAT Max, Fwd IAT Min,Bwd IAT Total, Bwd IAT Mean, Bwd IAT Std, Bwd IAT Max, Bwd IAT Min,Fwd PSH Flags, Bwd PSH Flags, Fwd URG Flags, Bwd URG Flags, Fwd Header Length, Bwd Header Length,Fwd Packets/s, Bwd Packets/s, Min Packet Length, Max Packet Length, Packet Length Mean, Packet Length Std, Packet Length Variance,FIN Flag Count, SYN Flag Count, RST Flag Count, PSH Flag Count, ACK Flag Count, URG Flag Count, CWE Flag Count, ECE Flag Count, Down/Up Ratio, Average Packet Size, Av

In [5]:
# read csv
df = pd.read_csv(IDS_DATASET)

In [6]:
# extract relevant features and label name
df = df[RELEVANT_FEATURES + [LABEL_NAME]]

In [7]:
# extract bengin and attack flows we want
benign_df, attack_df = df[(df[LABEL_NAME] == BENIGN_LABEL)], df[(df[LABEL_NAME] == ATTACK_LABEL)]

In [8]:
# rewrite label values
benign_df.loc[:, LABEL_NAME] = 0
attack_df.loc[:, LABEL_NAME] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [9]:
# convert to numpy
benign, attack = benign_df.values, attack_df.values

In [10]:
# max normalization
benign[:, :len(RELEVANT_FEATURES)] = max_norm(benign[:, :len(RELEVANT_FEATURES)])
attack[:, :len(RELEVANT_FEATURES)] = max_norm(attack[:, :len(RELEVANT_FEATURES)])

In [11]:
# do train, test split separately on benign and attack
benign_train, benign_test = train_test_split(benign, train_size=TRAIN_FRAC)
attack_train, attack_test = train_test_split(attack, train_size=TRAIN_FRAC)



In [12]:
# concat to get testing data
test_np = np.concatenate([benign_test, attack_test])

## Generate Dataset

In [13]:
# convert to benign train dataset and attack train dataset
benign_train_dataset, attack_train_dataset = tf.data.Dataset.from_tensor_slices(benign_train), tf.data.Dataset.from_tensor_slices(attack_train)

In [14]:
# deal with benign train dataset
benign_train_dataset = benign_train_dataset.map(parse_feature_label)
benign_train_dataset = benign_train_dataset.shuffle(buffer_size=benign_train.shape[0] * 5)  # randomize
benign_train_dataset = benign_train_dataset.batch(100) # make batch

# View a single example entry from a batch
benign_features, benign_label = iter(benign_train_dataset).next()
print("benign example features:", benign_features[0])
print("benign example label:", benign_label[0])

benign example features: tf.Tensor(
[6.67455206e-01 8.26536500e-04 1.65020267e-06 5.78418420e-04
 3.35115865e-06 8.04794521e-03 1.61790017e-01 1.33476247e-02
 1.04004159e-06 1.08950601e-06 1.25423728e-06 3.12012476e-08
 4.08333333e-07 4.15254237e-07 0.00000000e+00 4.15254237e-07
 8.33333333e-09 8.47457627e-09 0.00000000e+00 8.47457627e-09
 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.70189229e-04
 1.01522843e-02 4.84114977e-02 0.00000000e+00 1.42857143e-01
 1.89736956e-02 1.70189229e-04 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 5.36250536e-05 5.78418420e-04
 0.00000000e+00 5.43714659e-05 0.00000000e+00 0.00000000e+00
 0.00000000e+00], shape=(41,), dtype=float64)
benign example label: tf.Tensor(0, shape=(), dtype=int32)


In [15]:
# deal with attack train dataset
attack_train_dataset = attack_train_dataset.map(parse_feature_label)
attack_train_dataset = attack_train_dataset.shuffle(buffer_size=attack_train.shape[0] * 5)  # randomize
attack_train_dataset = attack_train_dataset.batch(100) # make batch

# View a single example entry from a batch
attack_features, attack_label = iter(attack_train_dataset).next()
print("attack example features:", attack_features[0])
print("attack example label:", attack_label[0])

attack example features: tf.Tensor(
[3.50056689e-02 0.00000000e+00 8.40744984e-09 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 3.82653066e-08 0.00000000e+00 8.47457634e-09 1.65343915e-04
 8.40336142e-09 3.81679394e-08 0.00000000e+00 7.68344218e-05
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 5.76923077e-02
 0.00000000e+00 0.00000000e+00 1.00000000e+00 0.00000000e+00
 0.00000000e+00 5.76923077e-02 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 9.38356164e-03 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00], shape=(41,), dtype=float64)
attack example label: tf.Tensor(1, shape=(), dtype=int32)


## Build Model

In [16]:
generator = Generator(input_shape=len(RELEVANT_FEATURES), output_shape=FEATURE_NUM_MODIFIED)
discriminator = Discriminator()

## Build Optimizer

In [17]:
generator_optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
discriminator_optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)

## Build Tensorflow Training Environment

In [18]:
step_counter = tf.train.get_or_create_global_step()
summary_writer = tf.contrib.summary.create_file_writer(
      OUTPUT_DIR, flush_millis=1000)
checkpoint_prefix = os.path.join(CHECKPOINT_DIR, 'ckpt')
latest_cpkt = tf.train.latest_checkpoint(CHECKPOINT_DIR)
if latest_cpkt:
    print('Using latest checkpoint at ' + latest_cpkt)
model_objects = {
    'generator': generator,
    'discriminator': discriminator,
    'generator_optimizer': generator_optimizer,
    'discriminator_optimizer': discriminator_optimizer,
    'step_counter': step_counter
}
checkpoint = tfe.Checkpoint(**model_objects)
# Restore variables on creation if a checkpoint exists.
checkpoint.restore(latest_cpkt)

<tensorflow.contrib.eager.python.checkpointable_utils.InitializationOnlyStatus at 0x2ba81fd992b0>

## Train

In [21]:
with tf.device('/gpu:0'):
    for _ in range(EPOCHS):
        start = time.time()
        with summary_writer.as_default():
            train_one_epoch(benign_dataset=benign_train_dataset,
                        attack_dataset=attack_train_dataset,
                        log_interval=LOG_INTERVAL,
                        modified_feature_num=FEATURE_NUM_MODIFIED,
                        **model_objects)
        end = time.time()
        checkpoint.save(checkpoint_prefix)
        print('\nTrain time for epoch #%d (step %d): %f' %
            (checkpoint.save_counter.numpy(),
             checkpoint.step_counter.numpy(),
             end - start))


Train time for epoch #1 (step 16): 1.534575

Train time for epoch #2 (step 31): 1.222377

Train time for epoch #3 (step 46): 1.346656

Train time for epoch #4 (step 61): 1.335705

Train time for epoch #5 (step 76): 1.291707

Train time for epoch #6 (step 91): 1.339791

Train time for epoch #7 (step 106): 1.258762

Train time for epoch #8 (step 121): 1.307727

Train time for epoch #9 (step 136): 1.287761

Train time for epoch #10 (step 151): 1.371519

Train time for epoch #11 (step 166): 1.255022

Train time for epoch #12 (step 181): 1.281898

Train time for epoch #13 (step 196): 1.400596

Train time for epoch #14 (step 211): 1.245561

Train time for epoch #15 (step 226): 1.362233

Train time for epoch #16 (step 241): 1.326281

Train time for epoch #17 (step 256): 1.193607

Train time for epoch #18 (step 271): 1.251435

Train time for epoch #19 (step 286): 1.301348

Train time for epoch #20 (step 301): 1.282299

Train time for epoch #21 (step 316): 1.344952

Train time for epoch #22 (s


Train time for epoch #172 (step 2581): 1.251042

Train time for epoch #173 (step 2596): 1.283273

Train time for epoch #174 (step 2611): 1.336829

Train time for epoch #175 (step 2626): 1.223984

Train time for epoch #176 (step 2641): 1.281697

Train time for epoch #177 (step 2656): 1.393901

Train time for epoch #178 (step 2671): 1.335229

Train time for epoch #179 (step 2686): 1.195389

Train time for epoch #180 (step 2701): 1.281116

Train time for epoch #181 (step 2716): 1.284389

Train time for epoch #182 (step 2731): 1.369133

Train time for epoch #183 (step 2746): 1.242917

Train time for epoch #184 (step 2761): 1.291498

Train time for epoch #185 (step 2776): 1.259474

Train time for epoch #186 (step 2791): 1.268484

Train time for epoch #187 (step 2806): 1.340425

Train time for epoch #188 (step 2821): 1.348798

Train time for epoch #189 (step 2836): 1.359559

Train time for epoch #190 (step 2851): 1.262174

Train time for epoch #191 (step 2866): 1.350200

Train time for epoc