In [1]:
import numpy as np
import os
import random
import csv

class UNSWGenerator(object):
    # STD
    # [3.11718494e+00, 5.05267629e+04, 1.59565885e+05, 6.01614215e+01, 3.68997951e+01, 2.04402226e+01, 5.60362463e+01, 9.02498798e+07, 4.38022457e+06, 7.34661981e+01, 1.20085728e+02, 1.21618092e+02, 1.21674944e+02, 1.43249037e+09, 1.43025723e+09, 1.53408714e+02, 3.38400001e+02, 3.48792432e-01, 5.02959031e+04, 1.62749588e+04, 2.81668871e+03, 3.03954671e+03, 1.58047419e+03, 3.18336467e-02, 1.71418292e-02, 1.64202905e-02, 4.47561323e-02, 5.28876325e-01, 3.88885807e-01, 1.35820667e-01, 1.37266282e-01, 7.48326410e+00, 7.43839560e+00, 5.30909327e+00, 5.48918817e+00, 5.40727891e+00, 3.86212078e+00, 7.39321762e+00]
    # MEAN
    # [5.83990359e-01, 4.43920514e+03, 3.82112115e+04, 4.98032817e+01, 3.16409877e+01, 5.55065064e+00, 1.73402538e+01, 2.01082635e+07, 2.74214517e+06, 3.56218705e+01, 4.57140918e+01, 1.65776746e+02, 1.65596682e+02, 1.39622413e+09, 1.39395230e+09, 1.25076663e+02, 3.05978873e+02, 9.58984344e-02, 4.96244366e+03, 1.58372001e+03, 7.78032492e+02, 2.25335137e+02, 9.18970630e+01, 4.39537934e-03, 2.37087431e-03, 2.02450502e-03, 2.00713999e-03, 1.54769779e-01, 1.05521278e-01, 1.82014026e-02, 1.83442595e-02, 6.72776896e+00, 6.54925922e+00, 4.51331784e+00, 5.03591995e+00, 2.60927913e+00, 1.92820439e+00, 4.06247991e+00]

    def __init__(self, data_folder, letter_swap=1, batch_size=1, classes=5, samples_per_class=10, max_iter=None, train=True):
        super(UNSWGenerator, self).__init__()
        self.data_folder = data_folder
        self.trainLabelDict = {"Normal":0, "Exploits":1, "Reconnaissance":1, "DoS":1,
                               "Generic": 1, "Shellcode":1, "Analysis":1}
        self.trainAttackDict = {"Exploits": 1, "Reconnaissance": 1, "DoS": 1,
                                "Generic": 1, "Shellcode": 1, "Analysis": 1}

        self.testLabelDict = {"Normal_test":0, "Backdoors_test":1, "Fuzzers_test": 1, "Worms_test":1,
                              "Generic_test": 1, "Shellcode_test":1, "Analysis_test":1}
        self.testAttackDict = {"Backdoors_test": 1, "Fuzzers_test": 1, "Worms_test": 1,
                              "Generic_test": 1, "Shellcode_test": 1, "Analysis_test": 1}
        attackVar=['Exploits', ' Reconnaissance ', 'DoS',
                               'Generic', ' Shellcode ', 'Analysis','Backdoors',' Fuzzers ','Worms']
        if(train):
            self.possible_labels = ["Normal"] + list(self.trainAttackDict)
            self.active_dir = self.trainLabelDict
        else:
            self.possible_labels = ["Normal_test"] + list(self.testAttackDict)
            self.active_dir = self.testLabelDict
        self.all_data_in_memory = {}

        for label in self.possible_labels:
            file = os.path.join(self.data_folder, label + "_UNSW.csv")
            print("reading from {}".format(file))
            with open(file, 'rt') as csvfile:
                spamreader = csv.reader(csvfile, quotechar='|')
                all_relevant_data = [row for row in spamreader]
                for row in all_relevant_data:
                    for col in range(38):
                        if row[col] == '' or row[col] == ' ':
                            row[col] = float(0)
                        elif row[col] =='Normal':
                            row[col]=0
                        elif row[col] in attackVar:
                            row[col]=1
                        else:
                            row[col] = float(row[col].strip())
                self.all_data_in_memory[label] = all_relevant_data

        self.letter_swap = letter_swap
        self.batch_size = batch_size
        self.classes = classes
        self.samples_per_class = samples_per_class
        self.max_iter = max_iter
        self.num_iter = 0
        self.features = 38
        self.working_labels = []
        for i in range(self.classes - 2):
            new_index = random.randint(0, len(self.possible_labels) - 1)
            self.working_labels.append(self.possible_labels[new_index])
        for i in range(2):
            if(train):
                self.working_labels.append("Normal")
            else:
                self.working_labels.append("Normal_test")
        self.cacheDict = {}
        self.newest_swapped_letter = self.working_labels[0]
        self.sample_mean = np.array([5.83990359e-01, 4.43920514e+03, 3.82112115e+04, 4.98032817e+01, 3.16409877e+01, 5.55065064e+00, 1.73402538e+01, 2.01082635e+07, 2.74214517e+06, 3.56218705e+01, 4.57140918e+01, 1.65776746e+02, 1.65596682e+02, 1.39622413e+09, 1.39395230e+09, 1.25076663e+02, 3.05978873e+02, 9.58984344e-02, 4.96244366e+03, 1.58372001e+03, 7.78032492e+02, 2.25335137e+02, 9.18970630e+01, 4.39537934e-03, 2.37087431e-03, 2.02450502e-03, 2.00713999e-03, 1.54769779e-01, 1.05521278e-01, 1.82014026e-02, 1.83442595e-02, 6.72776896e+00, 6.54925922e+00, 4.51331784e+00, 5.03591995e+00, 2.60927913e+00, 1.92820439e+00, 4.06247991e+00])
        self.sample_std = np.array([[3.11718494e+00, 5.05267629e+04, 1.59565885e+05, 6.01614215e+01, 3.68997951e+01, 2.04402226e+01, 5.60362463e+01, 9.02498798e+07, 4.38022457e+06, 7.34661981e+01, 1.20085728e+02, 1.21618092e+02, 1.21674944e+02, 1.43249037e+09, 1.43025723e+09, 1.53408714e+02, 3.38400001e+02, 3.48792432e-01, 5.02959031e+04, 1.62749588e+04, 2.81668871e+03, 3.03954671e+03, 1.58047419e+03, 3.18336467e-02, 1.71418292e-02, 1.64202905e-02, 4.47561323e-02, 5.28876325e-01, 3.88885807e-01, 1.35820667e-01, 1.37266282e-01, 7.48326410e+00, 7.43839560e+00, 5.30909327e+00, 5.48918817e+00, 5.40727891e+00, 3.86212078e+00, 7.39321762e+00]])


    def __iter__(self):
        return self

    def __next__(self):
        return self.next()

    def next(self):
        if (self.max_iter is None) or (self.num_iter < self.max_iter):
            self.num_iter += 1
            return (self.num_iter - 1), self.sample(self.classes)
        else:
            raise StopIteration

    def sample(self, nb_samples):
        for i in range(self.letter_swap):
            index_to_swap = random.randint(0, self.classes - 3)
            new_index = random.randint(0, len(self.possible_labels)-1)
            self.newest_swapped_letter = self.working_labels[0]
            self.working_labels[index_to_swap] = self.possible_labels[new_index]

        example_inputs = np.zeros((self.batch_size, nb_samples * self.samples_per_class, self.features), dtype=np.float32)
        example_outputs = np.zeros((self.batch_size, nb_samples * self.samples_per_class), dtype=np.float32)     #notice hardcoded np.float32 here and above, change it to something else in tf

        for i in range(self.batch_size):
            labels_and_samples = [(self.active_dir[active_label], x) for active_label in self.working_labels for x in random.sample(self.all_data_in_memory[active_label], self.samples_per_class)]
            random.shuffle(labels_and_samples)

            sequence_length = len(labels_and_samples)
            labels, samples = zip(*labels_and_samples)
            samples = np.asarray(samples, dtype=np.float32)
            #normalize
            samples = (samples - self.sample_mean) / self.sample_std

            example_inputs[i] = samples
            example_outputs[i] = np.asarray(labels, dtype=np.int32)

        return example_inputs, example_outputs

    def get_last_swapped_letter(self):
        return self.newest_swapped_letter


In [2]:
class UNSWAnalystManagement:
    def __init__(self, number_of_analysts, delay_creator, image_classifier, maximal_size=10000000):
        self.delay_creator = delay_creator
        self.current_delay_position = 0
        self.maximal_size = maximal_size
        self.delayed_classifications = {}
        self.number_of_analysts = number_of_analysts
        self.busy_analysts = 0
        self.job_queue = []
        self.image_classifier = image_classifier

    def __add_item(self, item_to_add, delay=0):
        # Add element to be written
        position_after_delay = (self.current_delay_position + delay) % self.maximal_size
        to_be_written_after_delay = self.delayed_classifications.get(position_after_delay, [])
        to_be_written_after_delay.append(item_to_add)
        self.delayed_classifications[position_after_delay] = to_be_written_after_delay

    def __start_processing_of_job(self, buffer): #private
        wait_for = self.delay_creator.get_rand_delay()
        self.busy_analysts += 1
        self.__add_item(buffer, delay=wait_for)

    def analysts_load(self):
        return float(self.busy_analysts + len(self.job_queue))/self.number_of_analysts

    def add_classification_job(self, buffer):
        if self.busy_analysts<self.number_of_analysts:
            self.__start_processing_of_job(buffer)
        else:
            self.job_queue.append(buffer)


    def free_job(self):
        self.busy_analysts -= 1
        if not len(self.job_queue) == 0:
            sample_to_return = self.job_queue[0]
            self.job_queue = self.job_queue[1:]
            self.__start_processing_of_job(sample_to_return)


    def advance_time(self):
        self.current_delay_position = (self.current_delay_position + 1) % self.maximal_size
        if self.current_delay_position in self.delayed_classifications:
            elements_to_add = self.delayed_classifications.get(self.current_delay_position)
            for element in elements_to_add:
                self.free_job()
                if self.image_classifier != None:
                    self.image_classifier.add_image_sample(element[0], element[1])
            del self.delayed_classifications[self.current_delay_position]

    def distance_to_samples_in_work(self, sample):
        samples_in_queue = [sample_file for (sample_file, y) in self.job_queue]
        for key in self.delayed_classifications.keys():
            processed_elements = self.delayed_classifications.get(key)
            for element in processed_elements:
                samples_in_queue.append(element[0])

        clean_distance = self.image_classifier.distance_from_sample(samples_in_queue, sample)
        if(clean_distance != None):
            return clean_distance

        return 10.0

In [3]:
import numpy as np
from sklearn.naive_bayes import GaussianNB

from sklearn.metrics.pairwise import paired_cosine_distances
from sklearn.metrics.pairwise import paired_manhattan_distances
from sklearn.metrics.pairwise import paired_euclidean_distances



class UNSWClassifier(object):
    def __init__(self, number_of_classes, normal_memory_size=1000, mal_memory_size=50, max_distance=2.0, similarity='l2'):
        super(UNSWClassifier, self).__init__()
        self.number_of_classes = number_of_classes
        self.normal_memory_size = normal_memory_size
        self.mal_memory_size = mal_memory_size
        self.normal_labels = []
        self.mal_labels = []
        self.normal_data_storage = None
        self.mal_data_storage = None
        self.gnb = GaussianNB()
        self.max_distance = max_distance
        self.similarity = similarity

    def classify(self, data_v):
        if((len(self.normal_labels) + len(self.mal_labels)) < 2):
            print("Calling classification without enough image samples - returning zero vector")
            return np.zeros(self.number_of_classes)

        if(self.normal_data_storage == None):
            all_data = self.mal_data_storage
        elif(self.mal_data_storage == None):
            all_data = self.normal_data_storage
        else:
            all_data = self.normal_data_storage + self.mal_data_storage
        all_labels = self.normal_labels + self.mal_labels
        data_length = len(all_labels)

        min_distance = np.ones(self.number_of_classes) * self.max_distance

        if(self.similarity == 'cos'):
            distances = paired_cosine_distances(all_data, np.tile(data_v, (data_length, 1)))
        elif(self.similarity == 'l1'):
            distances = paired_manhattan_distances(all_data, np.tile(data_v, (data_length, 1)))
        else:
            distances = paired_euclidean_distances(all_data, np.tile(data_v, (data_length, 1)))


        for index_i in range(data_length):
            if (abs(distances[index_i]) < min_distance[all_labels[index_i]]):
                min_distance[all_labels[index_i]] = abs(distances[index_i])

        min_distance = self.max_distance - min_distance
        return min_distance

    def add_image_sample(self, data_v, label):
        if(label >= self.number_of_classes):
            print("image label must not be greater than number of classes - 1")
            return

        if(label == 0): #Normal
            if (self.normal_data_storage == None):
                self.normal_data_storage = [data_v]
                self.normal_labels = [label]
            else:
                self.normal_data_storage.append(data_v)
                self.normal_labels.append(label)

            if (len(self.normal_labels) > self.normal_memory_size):
                self.normal_data_storage = self.normal_data_storage[1:]
                self.normal_labels = self.normal_labels[1:]
        else:
            if (self.mal_data_storage == None):
                self.mal_data_storage = [data_v]
                self.mal_labels = [label]
            else:
                self.mal_data_storage.append(data_v)
                self.mal_labels.append(label)

            if (len(self.mal_labels) > self.mal_memory_size):
                self.mal_data_storage = self.mal_data_storage[1:]
                self.mal_labels = self.mal_labels[1:]

        if(self.normal_data_storage == None):
            all_data = self.mal_data_storage
        elif(self.mal_data_storage == None):
            all_data = self.normal_data_storage
        else:
            all_data = self.normal_data_storage + self.mal_data_storage
        all_labels = self.normal_labels + self.mal_labels

        self.gnb = GaussianNB()
        self.gnb = self.gnb.fit(np.array(all_data), np.array(all_labels))

    def distance_from_sample(self, all_data, single_samples):
        if(all_data == None or len(all_data) == 0):
            return None

        data_length = len(all_data)

        if(self.similarity == 'cos'):
            distances = paired_cosine_distances(all_data, np.tile(single_samples, (data_length, 1)))
        elif(self.similarity == 'l1'):
            distances = paired_manhattan_distances(all_data, np.tile(single_samples, (data_length, 1)))
        else:
            distances = paired_euclidean_distances(all_data, np.tile(single_samples, (data_length, 1)))

        min_distance = 5.0
        for index_i in range(data_length):
            if (abs(distances[index_i]) < min_distance):
                min_distance = abs(distances[index_i])

        return min_distance


    def get_database_size(self):
        return len(self.normal_labels + self.mal_labels)

In [4]:
class UniformDelay():
    def __init__(self, par=1.0):
        self.par = par

    def get_rand_delay(self):
        return int(np.random.uniform(self.par + 1.1)) #1.1 is used to make inclusive selection

In [5]:
import tensorflow as tf

from tensorflow.contrib import rnn
from tensorflow.python.ops import math_ops

class DeROLAgent:
    # Action are = {class 0, class 1.....Request classification, Holdout}
    def __init__(self, batch_size, samples_per_batch, total_input_size, actions, lstm_units=200, learning_rate=0.001):
        # Constants
        self.policy_lstm_units = lstm_units
        self.input_size = total_input_size
        self.actions = actions
        self.number_of_classes = actions-2

        self.X = tf.placeholder("float", [batch_size, None, self.input_size])
        self.keep_prob = tf.placeholder_with_default(tf.constant(1.0, tf.float32), None)

        self.policy_lstm_cell = rnn.BasicLSTMCell(self.policy_lstm_units, forget_bias=1.0, activation=math_ops.tanh)
        self.policy_state_in = self.policy_lstm_cell.zero_state(batch_size, tf.float32)

        self.timeseries_length = tf.placeholder_with_default(tf.constant(samples_per_batch, tf.float32), None)
        self.learning_rate = tf.placeholder_with_default(tf.constant(learning_rate, tf.float32), None)

        self.policy_weights = {
            'w1': tf.Variable(tf.random_normal([self.policy_lstm_units, 128], stddev=0.01)),
            'out': tf.Variable(tf.random_normal([128, actions], stddev=0.01))
        }
        self.policy_biases = {
            'b1': tf.Variable(tf.random_normal([128])),
            'out': tf.Variable(tf.random_normal([actions]))
        }

        # DQN
        self.policy_rnnex_t, self.policy_rnn_state = tf.nn.dynamic_rnn( \
            inputs=self.X, cell=self.policy_lstm_cell, dtype=tf.float32, initial_state=self.policy_state_in, sequence_length=self.timeseries_length)
        self.policy_rnnex = tf.reshape(self.policy_rnnex_t, [-1, self.policy_lstm_units])

        self.hidden_layer = tf.nn.tanh(tf.add(tf.matmul(self.policy_rnnex, self.policy_weights['w1']), self.policy_biases['b1']))
        self.policy_logits = tf.add(tf.matmul(self.hidden_layer, self.policy_weights['out']), self.policy_biases['out'])
        self.policy_logits = tf.reshape(self.policy_logits, [-1, actions])

        # Policy
        self.predicted_action = tf.argmax(self.policy_logits, 1)

        # Training
        self.actions = tf.placeholder(shape=[batch_size, samples_per_batch], dtype=tf.int32)
        self.actions_onehot = tf.one_hot(self.actions, actions, dtype=tf.float32)
        self.Q_calculation = tf.placeholder(shape=[batch_size, samples_per_batch], dtype=tf.float32)

        self.actions_onehot_reshaped = tf.reshape(self.actions_onehot, [-1, actions])
        self.Q = tf.reduce_sum(tf.multiply(self.policy_logits, self.actions_onehot_reshaped), axis=1)
        self.Q_reshaped = tf.reshape(self.Q, [batch_size, samples_per_batch])

        self.loss = tf.reduce_sum(tf.square(self.Q_reshaped - self.Q_calculation))

        self.trainer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        self.updateModel = self.trainer.minimize(self.loss)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [6]:
import numpy as np


class DelayClassification:
    def __init__(self, max_load, maximal_size=100000):
        self.maximal_size = maximal_size
        self.current_position = 0
        self.delayed_inputs = {}
        self.experience_buffer = []
        self.current_batch_buffer = []
        self.agentManager = None
        self.batch_number = 0
        self.delayed_samples = 0
        self.rewards = []
        self.max_load = max_load

    def advance_state(self):
        # Verify no samples were left untouched
        if (len(self.delayed_inputs.get(self.current_position, [])) != 0):
            print("Fatal error in DelayClassification, state advancement is not allowed with unattended samples, " +
                  "there are still " + str(len(self.delayed_inputs.get(self.current_position, []))) + " samples")
            exit(1)

        self.current_position = (self.current_position + 1) % self.maximal_size

    def add_item(self, item_to_add, delay=0):
        # Add element to be written
        position_after_delay = (self.current_position + delay) % self.maximal_size
        to_be_written_after_delay = self.delayed_inputs.get(position_after_delay, [])
        to_be_written_after_delay.append(item_to_add)
        self.delayed_inputs[position_after_delay] = to_be_written_after_delay
        self.delayed_samples += 1


    def get_current_position(self):
        return self.current_position

    def get_number_of_samples_to_handle(self):
        return len(self.delayed_inputs.get(self.current_position, []))

    def is_waiting_sample(self):
        return len(self.delayed_inputs.get(self.current_position, [])) > 0

    def get_buffer_size(self):
        return self.maximal_size

    def get_sample(self):
        if (len(self.delayed_inputs.get(self.current_position, [])) == 0):
            print("in DelayClassification, asked for samples with empty buffer, returning None")
            return None
        else:
            to_be_written_after_delay = self.delayed_inputs.get(self.current_position, [])
            to_return = to_be_written_after_delay[0]
            to_be_written_after_delay = to_be_written_after_delay[1:]
            self.delayed_inputs[self.current_position] = to_be_written_after_delay
            self.delayed_samples -= 1
            return to_return

    def get_up_to_samples(self, number_of_samples):
        if (len(self.delayed_inputs.get(self.current_position, [])) == 0):
            print("in DelayClassification, asked for samples with empty buffer, returning None")
            return None
        else:
            to_be_written_after_delay = self.delayed_inputs.get(self.current_position, [])
            to_return = to_be_written_after_delay[0:number_of_samples]
            to_be_written_after_delay = to_be_written_after_delay[number_of_samples:]
            self.delayed_inputs[self.current_position] = to_be_written_after_delay
            return to_return

    def get_all_samples(self):
        if (len(self.delayed_inputs.get(self.current_position, [])) == 0):
            return []
        else:
            to_return = self.delayed_inputs.get(self.current_position, [])
            self.delayed_inputs[self.current_position] = []
            return to_return

    def get_number_of_delayed(self):
        return self.delayed_samples

    def get_load(self):
        return self.delayed_samples / float(self.max_load)

In [7]:
from threading import Lock
import numpy as np


class ExperimentLogger:
    def __init__(self, samples_in_batch, maximal_size=10000000, graph_creator=None):
        self.maximal_size = maximal_size
        self.graph_creator = graph_creator
        if maximal_size<=0:
            raise ValueError('maximal value for iterations should be positive, obtained {}'.format(maximal_size))
        self.current_position = 0
        self.delayed_inputs = {}
        self.samples_in_batch = samples_in_batch
        self.experience_buffer = []
        self.current_batch_buffer = []
        self.agentManager = None
        self.lock = Lock()
        self.batch_number = 0
        self.rewards = []

    def set_agents(self, agentManager):
        self.agentManager = agentManager

    def __add_to_internal_current_buffer(self, element_to_add): #private
        self.current_batch_buffer.append(element_to_add)
        if self.graph_creator != None:
            self.rewards.append(element_to_add[2])
        if len(self.current_batch_buffer) == self.samples_in_batch:
            self.experience_buffer.append(self.current_batch_buffer)
            self.current_batch_buffer = []

    def advance_state(self):
        self.current_position = (self.current_position + 1) % self.maximal_size

        # Add elements delayed for now
        if self.current_position in self.delayed_inputs:
            elements_to_add = self.delayed_inputs.get(self.current_position)
            for element in elements_to_add:
                self.__add_to_internal_current_buffer(element)
                if self.agentManager != None:
                    self.agentManager.free_job()
            del self.delayed_inputs[self.current_position]


    def add_item(self, item_to_add, delay=0, update=True):
        if delay == 0:
            self.__add_to_internal_current_buffer(item_to_add)
        else:
            # Add element to be written
            position_after_delay = (self.current_position + delay) % self.maximal_size
            to_be_written_after_delay = self.delayed_inputs.get(position_after_delay, [])
            to_be_written_after_delay.append(item_to_add)
            self.delayed_inputs[position_after_delay] = to_be_written_after_delay

        if update == True:
            self.advance_state()

    def create_batch(self, batch_size):
        if len(self.experience_buffer) < batch_size:
            raise ValueError('Not enough stored batches in experience creator, requested {}, currently buffer of size {}'.format(batch_size, len(self.experience_buffer)))
        if self.graph_creator != None:
            self.graph_creator.add_score_sample(np.average(self.rewards[0:batch_size]))
            self.rewards = self.rewards[batch_size:]

        to_be_returned = self.experience_buffer[0:batch_size]
        self.experience_buffer = self.experience_buffer[batch_size:]
        self.batch_number += 1
        return to_be_returned

    def number_of_batches(self):
        return len(self.experience_buffer)

    def get_current_position(self):
        return self.current_position

    def get_buffer_size(self):
        return self.maximal_size

In [10]:
import tensorflow as tf
import numpy as np
import sys

np.set_printoptions(threshold=sys.maxsize)
import time
import math
import os
##Configurations
experiment_name = "one-shot-UNSW-NB15"
batch_size = 1
loaded_characters = 3
total_classes = 2
actions = total_classes + 2
samples_per_class = 20
samples_per_batch = loaded_characters * samples_per_class
total_input_size = 6
lstm_units = 200
learning_rate = 0.001
gamma = 0.5
eps = 0.05
number_of_analysts = 2
delay_classification_penalty_base = -1.0
delay_classification_penalty_multiplier = 2
classification_delay_timesteps = 5
wrong_classification_penalty = -2
analyst_load_penalty_multiplier = -0.5
full_analyst_load = 5.0
analyst_delay_param = 9.0
training_batches = 20
statistics_siplay_step = 50
max_number_of_delayed = 100
is_training_phase = True
##End Configurations


tf.reset_default_graph()

rlAgent = DeROLAgent(batch_size=batch_size, samples_per_batch=samples_per_batch, total_input_size=total_input_size,
                                    actions=actions, learning_rate=learning_rate)
image_classifier = UNSWClassifier(total_classes, normal_memory_size=10, mal_memory_size=10, max_distance=5.0, similarity='l2')
analyst_expr_manager = ExperimentLogger(samples_per_batch, graph_creator=None)
samples_delay_manager = DelayClassification(max_load=max_number_of_delayed)
delay_creator = UniformDelay(analyst_delay_param)
job_manager = UNSWAnalystManagement(number_of_analysts, delay_creator, image_classifier)


init = tf.global_variables_initializer()

def benchmark(folder):
    print("Starting Training from data: " + folder)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(init)

        # try to load a saved model
        model_loader = tf.train.Saver()
        current_model_folder = "trained_models/backup/model-" + experiment_name
        if (os.path.exists(current_model_folder)):
            if os.path.exists(current_model_folder + "/model.ckpt"):
                print("Loading pre calculaated model")
                model_loader.restore(sess, current_model_folder + "/model.ckpt")
                print("v1 : %s" % rlAgent.policy_biases['out'].eval()[0])
        else:
            print("Creating the folder for the model to be stored in")
            os.makedirs(current_model_folder)

        generator = UNSWGenerator(data_folder=folder, batch_size=batch_size, classes=loaded_characters,
                                                          samples_per_class=samples_per_class)

        t0 = time.time()

        global eps
        global is_training_phase

        policy_online_state = (np.zeros([batch_size, lstm_units]),
                 np.zeros([batch_size, lstm_units]))  # Reset the policy layer's hidden state for iterative actions

        policy_training_state = (np.zeros([batch_size, lstm_units]),
                                 np.zeros(
                                     [batch_size, lstm_units]))  # Reset the policy layer's hidden state for training

        accuracy_agg = []
        confusion_matrix = np.zeros(6)
        sample_per_batch_agg = []
        reward_agg = []
        loss_agg = []

        r_m1 = None
        a_m1 = None
        x_m1 = None

        for i, (image_files_t, labels_t) in generator:

            for batch in range(batch_size):
                image_files = image_files_t[batch]
                labels = labels_t[batch]
                sample_counter = 0
                samples_length = len(image_files)
                if ((i % statistics_siplay_step == 0) and (batch == 0)):
                    print("Starting policy training on step " + str(i) + ", over data with length: " + str(samples_length))
                    if(is_training_phase):
                        print("Saving most recent model")
                        save_path = model_loader.save(sess, current_model_folder + "/model.ckpt")
                        print("Model saved in path: %s" % save_path)
                        print("v1 : %s" % rlAgent.policy_biases['out'].eval()[0])


                timeseries = np.array(image_files)
                batch_y = np.array([int(x) for x in labels.flatten()])

                sample = 0
                reward_sampling = []

                while sample < samples_length:

                    x_sample_buffer = None
                    classification_logits_sample = None
                    sample_file = None
                    is_delayed_sample = False
                    delay_penalty = delay_classification_penalty_base
                    y = -1

                    if(samples_delay_manager.is_waiting_sample()):
                        x_sample_buffer_tuple = samples_delay_manager.get_sample()
                        delay_penalty = x_sample_buffer_tuple[0]
                        x_sample_buffer = x_sample_buffer_tuple[1]
                        sample_file = x_sample_buffer_tuple[3]
                        y = x_sample_buffer_tuple[2]
                        classification_logits_sample = image_classifier.classify(sample_file)
                        is_delayed_sample = True
                    else:
                        y = int(batch_y[sample])
                        classification_logits_sample = image_classifier.classify(timeseries[sample])
                        x_sample_buffer = timeseries[sample]
                        sample_file = timeseries[sample]

                    accuracy_agg.append(np.argmax(classification_logits_sample) == y)
                    sample_counter += 1

                    sample_for_policy = np.concatenate(
                        ([samples_delay_manager.get_load()], [delay_penalty], [job_manager.analysts_load()], [job_manager.distance_to_samples_in_work(x_sample_buffer)], np.reshape(classification_logits_sample, -1)))
                    qp1, a, new_online_state = sess.run([rlAgent.policy_logits, rlAgent.predicted_action, rlAgent.policy_rnn_state],
                                                        feed_dict={rlAgent.X: np.reshape(sample_for_policy, [batch_size, 1, -1]),
                                                                   rlAgent.policy_state_in: policy_online_state, rlAgent.timeseries_length: 1})

                    policy_online_state = new_online_state
                    a = a[0]
                    qp1 = qp1[0]

                    rand_action_sample = np.random.rand(1)
                    if rand_action_sample < eps:
                        rand_action_sample = np.random.rand(1)
                        if rand_action_sample < 0.25:
                            a = total_classes
                        elif rand_action_sample < 0.5:
                            a = total_classes + 1
                        elif rand_action_sample < 0.75:
                            a = y
                        else:
                            a = np.random.randint(0, total_classes)

                    reward = 0
                    if (a == total_classes + 1):  # Delay in classification
                        if(samples_delay_manager.get_number_of_delayed() >= max_number_of_delayed ):
                            reward = wrong_classification_penalty
                            confusion_matrix[4] += 1
                        else:
                            reward = delay_penalty
                            delay_penalty *= delay_classification_penalty_multiplier
                            samples_delay_manager.add_item((delay_penalty, x_sample_buffer, y, sample_file), delay=classification_delay_timesteps)
                            confusion_matrix[2] += 1

                    elif (a == total_classes):  # Asked for classification
                        if(job_manager.analysts_load() >= full_analyst_load):
                            reward = wrong_classification_penalty
                            confusion_matrix[5] += 1
                        else:
                            job_manager.add_classification_job((sample_file, y))
                            reward = math.floor(analyst_load_penalty_multiplier * job_manager.analysts_load() + 0.1)
                            confusion_matrix[3] += 1
                    else: # Automatic prediction
                        if (a != y):
                            if(y == 0):
                                reward = wrong_classification_penalty
                            else:
                                reward = wrong_classification_penalty * 15
                            confusion_matrix[1] += 1
                        else:
                            confusion_matrix[0] += 1
                            if(y != 0):
                                reward = 1

                    reward_sampling.append(reward)
                    max_q = max(qp1)
                    if(a_m1 != None and is_training_phase):
                        analyst_expr_manager.add_item(
                            np.array([x_m1, a_m1, r_m1, max_q]), delay=0)
                    x_m1 = sample_for_policy
                    a_m1 = a
                    r_m1 = reward

                    # Train the policy
                    if( (analyst_expr_manager.number_of_batches() >= batch_size) and is_training_phase):
                        expr_matrix = np.reshape(analyst_expr_manager.create_batch(batch_size), [-1, 4])
                        x_matrix = np.reshape(np.vstack(expr_matrix[:, 0]),
                                              [batch_size, samples_per_batch, total_input_size])
                        action_matrix = np.reshape(expr_matrix[:, 1], [-1, samples_per_batch])
                        q_calc_expr = expr_matrix[:, 2] + gamma * expr_matrix[:, 3]
                        q_calc_expr = np.reshape(q_calc_expr, [-1, samples_per_batch])

                        state_t, _, loss = sess.run([rlAgent.policy_rnn_state, rlAgent.updateModel, rlAgent.loss],
                                              feed_dict={rlAgent.X: x_matrix, rlAgent.actions: action_matrix,
                                                         rlAgent.Q_calculation: q_calc_expr, rlAgent.policy_state_in: policy_training_state})
                        policy_training_state = policy_online_state
                        loss_agg.append(loss)


                    if(a != total_classes + 1): #advance timestamp count if sample is not delayed
                        job_manager.advance_time()
                        if(not samples_delay_manager.is_waiting_sample()):
                            samples_delay_manager.advance_state()
                    if(not is_delayed_sample):
                        sample += 1

                sample_per_batch_agg.append(sample_counter)
                reward_agg.append(np.sum(reward_sampling))
                reward_sampling = []


            #Statistics
            if(i % statistics_siplay_step == 0 and i!=0):
                print ("Batch " + str(i) + " finished after " + str(time.time() - t0) + " seconds")
                print ("Average cycle reward is: " + str(np.sum(reward_agg)/statistics_siplay_step))
                print("averaged accuracy since last print is " + str(np.average(accuracy_agg)))
                if(is_training_phase):
                    print("averaged loss since last print " + str(np.average(loss_agg)))
                print("confusion matrix: " + str(confusion_matrix / (batch_size * statistics_siplay_step)))
                print("Average number of samples: new: {}, total: {}".format(samples_per_batch, np.average(sample_per_batch_agg)))

                #Clear the counters
                confusion_matrix = np.zeros(6)
                accuracy_agg = []
                sample_per_batch_agg = []
                loss_agg = []
                reward_agg = []

            if i == training_batches:
                eps = 0.0
                is_training_phase = False

            if i == training_batches*2:
                print("Finished full system training, execution time " + str(time.time() - t0) + " seconds")
                return
benchmark('datasets')

Starting Training from data: datasets
reading from datasets\Normal_UNSW.csv
reading from datasets\Exploits_UNSW.csv
reading from datasets\Reconnaissance_UNSW.csv
reading from datasets\DoS_UNSW.csv
reading from datasets\Generic_UNSW.csv
reading from datasets\Shellcode_UNSW.csv
reading from datasets\Analysis_UNSW.csv
Starting policy training on step 0, over data with length: 60
Saving most recent model
Model saved in path: trained_models/backup/model-one-shot-UNSW-NB15/model.ckpt
v1 : 0.6648777
Calling classification without enough image samples - returning zero vector
Calling classification without enough image samples - returning zero vector
Calling classification without enough image samples - returning zero vector
Calling classification without enough image samples - returning zero vector
Calling classification without enough image samples - returning zero vector
Calling classification without enough image samples - returning zero vector
Calling classification without enough image sa



In [None]:
#首次运行时执行一次即可.
import numpy as np
import pandas as pd

# Read first row to determine size of columns
size = pd.read_csv('UNSW-NB15_2.csv', nrows=0, header=None).shape[1]

'''Folder Structure Preparation
1. Column 45 (for excel - AV) has the attack name or blank for normal behavior - will be reffered as "Record Category"
2. Partition the file by record category, with the following rules
    a. file names start with capital letter, normal records is called "Normal"
    b. files for train should be called <category>_UNSW.csv - example Generic_UNSW.csv
    c. files for test should be called <category>test__UNSW.csv - example Shellcode_test_UNSW.csv
    d. all categorical and time columns (1,2,3,4,5,6,14,21,22,45,46) should be deleted
                                        0-5,13,20,21,44,45
    e. files should not contain any headers
3. Pass the parent directory with all the files when running the algorithm'''

ranges = np.r_[0:6, 13, 20:22, 44:46]
ar = np.delete(np.arange(size), ranges)

df = pd.read_csv('UNSW-NB15_2.csv', usecols=ar, low_memory=False, header=None)
df_Normal = df[df[47].isnull()].reset_index(drop=True)
df_Fuzzers = df[df[47] == ' Fuzzers '].reset_index(drop=True)
df_Exploits = df[df[47] == 'Exploits'].reset_index(drop=True)
df_DoS = df[df[47] == 'DoS'].reset_index(drop=True)
df_Generic = df[df[47] == 'Generic'].reset_index(drop=True)
df_Shellcode = df[df[47] == ' Shellcode '].reset_index(drop=True)
df_Analysis = df[df[47] == 'Analysis'].reset_index(drop=True)
df_Reconnaissance = df[df[47] == ' Reconnaissance '].reset_index(drop=True)
df_Worms = df[df[47] == 'Worms'].reset_index(drop=True)
df_Backdoor = df[df[47] == 'Backdoor'].reset_index(drop=True)
allDF = [df_Normal, df_Fuzzers, df_Exploits, df_DoS, df_Generic, df_Shellcode, df_Analysis, df_Reconnaissance, df_Worms,
         df_Backdoor]
for tempDF in allDF:
    dfTrain = tempDF.sample(frac=0.7).reset_index(drop=True)
    dfTest = tempDF[~tempDF.index.isin(dfTrain.index)].reset_index(drop=True)
    category= 'Normal' if isinstance(dfTest.loc[0,47],float) else dfTest.loc[0,47]
    category=category.replace(' ','')
    dfTrain.to_csv(category+'_UNSW.csv', index=False,header=None)
    dfTest.to_csv(category+'_test_UNSW.csv', index=False,header=None)
print(1)
# print(df)