### - import libraries

In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from time import time
from tqdm import tqdm
import pickle
from sklearn.metrics import average_precision_score
from sklearn.metrics import recall_score

In [2]:
# Set random seem for reproducibility
manualSeed = 2019
torch.manual_seed(manualSeed)
np.random.seed(manualSeed)

In [3]:
ngpu = torch.cuda.device_count()
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

### - import data

In [4]:
def save_obj(obj, name ):
    with open('obj/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
        
def load_obj(name ):
    with open('obj/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)
    
dataset = load_obj('kdd99')
x_train = dataset['x_train']
y_train = dataset['y_train']
x_valid = dataset['x_valid']
y_valid = dataset['y_valid']
x_test = dataset['x_test']
y_test = dataset['y_test']

x_train = torch.tensor(x_train, device=device)
# y_train = torch.tensor(y_train, device=device)
x_valid = torch.tensor(x_valid, device=device)
# y_valid = torch.tensor(y_valid, device=device)
x_test = torch.tensor(x_test, device=device)
# y_test = torch.tensor(y_test, device=device)

### - define hyper-parameters

In [5]:
G_D_num = 10
G_group = {}
D_group = {}
z_avg = 0
BATCH_SIZE = 64
EPOCH = 100
LR_G = 0.01  # learning rate for generator
LR_D = 0.01  # learning rate for discriminator
random_neuron = 100  # number of neurons as input of generator
components = 121  # total number of points that G generated
critic_num = 5
D_loss_history = []
G_loss_history = []
alpha = 0.1

### - define network class

In [6]:
class Generator(nn.Module):
    def __init__(self, ngpu):
        super(Generator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            nn.Linear(random_neuron, 128),
            nn.ReLU(True),
            nn.Linear(128, 128),
            nn.ReLU(True),
            nn.Linear(128, components)
        )

    def forward(self, input):
        return self.main(input)

In [7]:
class Discriminator(nn.Module):
    def __init__(self, ngpu):
        super(Discriminator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            nn.Linear(components, 128),
            nn.ReLU(True),
            nn.Linear(128, 128),
            nn.ReLU(True),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input)

### - training

In [None]:
for iter in range(G_D_num):

    begin = time()

    D = Discriminator(ngpu).to(device)
    G = Generator(ngpu).to(device)

    # optimization
    opt_D = torch.optim.Adam(D.parameters(), lr=LR_D)
    opt_G = torch.optim.Adam(G.parameters(), lr=LR_G)

    for step in tqdm(range(EPOCH)):
        
        for batch_iter in range(x_train.shape[0] // BATCH_SIZE):
            
            selected_real = x_train[batch_iter * BATCH_SIZE:(batch_iter * BATCH_SIZE + BATCH_SIZE)].float()
            for _ in range(critic_num):

                # random samples of real data
#                 idx = np.random.choice(len(x_train), BATCH_SIZE)
#                 selected_real = x_train[idx].float()

                # random noises
                G_noise = torch.randn(BATCH_SIZE, random_neuron).cuda()
                G_data = G(G_noise)

                prob_real = D(selected_real)  # D try to increase this prob
                prob_fake = D(G_data)  # D try to decrease this prob

                D_loss = -torch.mean(
                    torch.log(prob_real + 1e-9) + torch.log(1. - prob_fake + 1e-9))
                G_loss = torch.mean(torch.log(1. - prob_fake + 1e-9))

                opt_D.zero_grad()
                D_loss.backward(retain_graph=True)  # reusing computational graph
                opt_D.step()

            opt_G.zero_grad()
            G_loss.backward()
            opt_G.step()

    end = time()
    print(
        str(iter + 1) + '/' + str(G_D_num) +
        ' is done! Time is {0:0.2f} seconds;'.format((end - begin)))

    prediction = D(x_test.float()).cpu().detach().numpy()
    tmp_score = np.zeros_like(prediction)
    for i in range(prediction.shape[0]):
        if prediction[i] > alpha:
            tmp_score[i] = 0
        else:
            tmp_score[i] = 1

    precision = average_precision_score(y_test.reshape([-1, 1]), tmp_score)
    recall = recall_score(y_test.reshape([-1, 1]), tmp_score)
    F1 = 2 * (precision * recall) / (precision + recall)
    print('Precision score: {0:0.2f}'.format(precision))
    print('Recall score: {0:0.2f}'.format(recall))
    print('F1 score: {0:0.2f}'.format(F1))
    print('*************************************************************')

    G_group['G' + str(iter)] = G
    D_group['D' + str(iter)] = D

  4%|▍         | 4/100 [02:17<54:59, 34.37s/it]

---

# evaluation

In [None]:
test_score = 0.0
for iter in range(G_D_num):
    prediction = D_group['D' + str(iter)](x_test.float()).cpu().detach().numpy()
    test_score = test_score + prediction / G_D_num

for i in range(test_score.shape[0]):
    if test_score[i] > alpha:
        test_score[i] = 0
    else:
        test_score[i] = 1

precision = average_precision_score(y_test.reshape([-1,1]), test_score)
recall = recall_score(y_test.reshape([-1,1]), test_score)
F1 = 2 * (precision * recall) / (precision + recall)
print('Precision score: {0:0.2f}'.format(precision))
print('Recall score: {0:0.2f}'.format(recall))
print('F1 score: {0:0.2f}'.format(F1))