In [131]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import glob
import os
import time

from keras import backend as K

from keras.datasets import mnist
from keras.layers import (Activation, BatchNormalization, Concatenate, Dense,
                          Dropout, Flatten, Input, Lambda, Reshape)
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.models import Model, Sequential
from keras.optimizers import Adam
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

# Load original data

In [119]:
df_ori = pd.read_csv('/Users/xiafei/code/itu-ml-challenge/csv/dataset.csv')
try:
    df_ori = df_ori.loc[:,(df_ori !=0).any(axis=0)]
    df_ori = df_ori.drop(columns=['Unnamed: 0'])
except:
    print('drop error')

In [120]:
print(df_ori.shape)
df_ori.head()

(9670, 662)


Unnamed: 0,p_/computes0/service/id,p_/computes0/vcpus_used,p_/computes0/vcpus,p_/computes0/memory_mb_used,p_/computes0/memory_mb,p_/computes0/cpu_info/topology/cores,p_/computes0/cpu_info/topology/cells,p_/computes0/cpu_info/topology/threads,p_/computes0/cpu_info/topology/sockets,p_/computes0/running_vms,...,v_/ports#link-tr-tr-a-1-y/metrics/network-incoming-bytes-rate,v_/ports#link-tr-tr-a-1-y/metrics/network-incoming-packets,v_/ports#link-tr-tr-a-1-y/metrics/network-incoming-packets-rate,v_/ports#link-tr-tr-a-1-y/metrics/network-outgoing-bytes,v_/ports#link-tr-tr-a-1-y/metrics/network-outgoing-bytes-rate,v_/ports#link-tr-tr-a-1-y/metrics/network-outgoing-packets,v_/ports#link-tr-tr-a-1-y/metrics/network-outgoing-packets-rate,v_/time,v_type,v_type_code
0,16,20,48,41472,257790,12,2,2,1,5,...,13.236069,1401532.0,0.100425,174853276.0,11.431817,1263588.0,0.099783,1593395580,ixnetwork-traffic-start,0
1,16,20,48,41472,257790,12,2,2,1,5,...,11.438221,1401538.0,0.099506,174854074.0,13.313681,1263595.0,0.116705,1593395640,ixnetwork-traffic-start,0
2,16,20,48,41472,257790,12,2,2,1,5,...,11.438221,1401545.0,0.099506,174854758.0,13.313681,1263601.0,0.100463,1593395700,ixnetwork-traffic-start,0
3,16,20,48,41472,257790,12,2,2,1,5,...,11.429972,1401551.0,0.099826,174855556.0,13.277844,1263608.0,0.116223,1593395760,ixnetwork-traffic-start,0
4,16,20,48,41472,257790,12,2,2,1,5,...,11.429972,1401558.0,0.116494,174856338.0,13.277844,1263615.0,0.116614,1593395820,ixnetwork-traffic-start,0


# Select type n as the original data

In [151]:
curr_type = 3

In [152]:
df_ori_typeN = df_ori[df_ori['v_type_code'] == curr_type]
print(df_ori_typeN.shape)

(559, 662)


In [153]:
top_N = 550

In [154]:
df_ori_typeN = df_ori_typeN[:top_N]
print(df_ori_typeN.shape)

(550, 662)


# Load generated data

In [164]:
df_gan = pd.read_csv('./data/generated_data.csv')

In [165]:
print(df_gan.shape)
df_gan.head()

(11544, 100)


Unnamed: 0,n_/devices/modules/Cisco-IOS-XE-bgp-oper/bgp-state-data/address-families/address-family/activities/prefixes,n_/devices/modules/Cisco-IOS-XE-bgp-oper/bgp-state-data/neighbors/neighbor/prefix-activity/sent/current-prefixes,v_/ports#link-tr-ssm-b-1-y/metrics/network-outgoing-bytes,n_/devices/modules/Cisco-IOS-XE-bgp-oper/bgp-state-data/address-families/address-family/as-path/total-entries,v_/ports#link-intgw-exgw-a-3-x/metrics/network-outgoing-bytes-rate,n_/devices/modules/Cisco-IOS-XE-bgp-oper/bgp-state-data/address-families/address-family/prefixes/total-entries,v_/devices#RR-01/power_state,n_/devices/modules/Cisco-IOS-XE-interfaces-oper/interfaces/interface/diffserv-info/diffserv-target-classifier-stats/queuing-stats/drop-pkts,v_/ports#link-tr-intgw-a-3-x/metrics/network-incoming-bytes,n_/devices/modules/Cisco-IOS-XE-bgp-oper/bgp-state-data/neighbors/neighbor/prefix-activity/sent/explicit-withdraw,...,v_/devices#TR-01/metrics/cpu_util,v_/ports#link-tr-intgw-b-1-x/metrics/network-outgoing-bytes,v_/ports#link-tr-intgw-a-2-x/metrics/network-outgoing-bytes-rate,v_/ports#link-tr-intgw-a-2-y/metrics/network-outgoing-bytes-rate,v_/devices#RR-01/metrics/disk-device-write-bytes-rate,v_/ports#link-tr-intgw-a-3-x/metrics/network-incoming-packets-rate,v_/ports#link-intgw-exgw-b-2-x/metrics/network-outgoing-bytes,v_/devices#TR-01/metrics/disk-device-write-bytes,v_/ports#link-tr-intgw-a-3-x/metrics/network-outgoing-packets,v_/ports#link-tr-intgw-a-3-y/metrics/network-incoming-packets
0,1665107,7835,14035076096252,3114,727527,7835,1,47368546,173941103,4,...,56,19157104171752,2129572,2428825,55,0,2929842342055,272801120,1936011,1939168
1,1664076,7835,13875043181825,4670,1043368,7835,1,40769033,173360991,4,...,50,19073871138194,1522930,724306,37,0,2894148784336,618949532,1928074,1931248
2,1663838,7835,13806590908239,4670,1230870,7835,1,37710445,173092400,5,...,53,19038093617971,1138817,610914,6,0,2879215228734,12202971,1922416,1925587
3,1663899,7835,13820527697427,4671,1232926,7835,1,38378736,173145430,6,...,51,19045348677003,1242803,529285,62,0,2882263917973,1475188576,1923176,1926347
4,1663861,7836,13812874966038,4671,1252564,7836,1,38004575,173119768,5,...,57,19041365895864,1906637,2850067,14,0,2880569318085,57152,1922719,1925890


# Recover to original dataset

In [166]:
columns = df_gan.columns
df_final = pd.DataFrame(columns=df_ori_typeN.columns)
total_batch = df_gan.shape[0] // df_ori_typeN.shape[0]

for batch in range(total_batch):
    print('batch:', batch)
    df_temp = pd.DataFrame(df_ori_typeN)
    start = batch*df_ori_typeN.shape[0]
    end = (batch+1) * df_ori_typeN.shape[0]
    print(start, end)
    for column in columns:
#         print('process:', start, end)
        
        df_temp[column] = df_gan.loc[start:end-1, column].to_numpy()
    
#     df_final = df_final.append(df_temp, ignore_index = True)
    df_final = pd.concat([df_final, df_temp], ignore_index=True)
#     print(df_final.shape)

batch: 0
0 550
batch: 1
550 1100
batch: 2
1100 1650
batch: 3
1650 2200
batch: 4
2200 2750
batch: 5
2750 3300
batch: 6
3300 3850
batch: 7
3850 4400
batch: 8
4400 4950
batch: 9
4950 5500
batch: 10
5500 6050
batch: 11
6050 6600
batch: 12
6600 7150
batch: 13
7150 7700
batch: 14
7700 8250
batch: 15
8250 8800
batch: 16
8800 9350
batch: 17
9350 9900
batch: 18
9900 10450
batch: 19
10450 11000


In [167]:
print(df_final.shape)
df_final.head()

(11000, 662)


Unnamed: 0,p_/computes0/service/id,p_/computes0/vcpus_used,p_/computes0/vcpus,p_/computes0/memory_mb_used,p_/computes0/memory_mb,p_/computes0/cpu_info/topology/cores,p_/computes0/cpu_info/topology/cells,p_/computes0/cpu_info/topology/threads,p_/computes0/cpu_info/topology/sockets,p_/computes0/running_vms,...,v_/ports#link-tr-tr-a-1-y/metrics/network-incoming-bytes-rate,v_/ports#link-tr-tr-a-1-y/metrics/network-incoming-packets,v_/ports#link-tr-tr-a-1-y/metrics/network-incoming-packets-rate,v_/ports#link-tr-tr-a-1-y/metrics/network-outgoing-bytes,v_/ports#link-tr-tr-a-1-y/metrics/network-outgoing-bytes-rate,v_/ports#link-tr-tr-a-1-y/metrics/network-outgoing-packets,v_/ports#link-tr-tr-a-1-y/metrics/network-outgoing-packets-rate,v_/time,v_type,v_type_code
0,16,20,48,41472,257790,12,2,2,1,5,...,0,1417127.0,0.099865,210831384.0,11.409236,1294122.0,0.116733,1593399060,interface-down,3
1,16,20,48,41472,257790,12,2,2,1,5,...,0,1417135.0,0.099946,210832166.0,13.281936,1294129.0,0.116733,1593399120,interface-down,3
2,16,20,48,41472,257790,12,2,2,1,5,...,0,1417141.0,0.133348,210832964.0,13.071012,1294136.0,0.116903,1593399180,interface-down,3
3,16,20,48,41472,257790,12,2,2,1,5,...,0,1417147.0,0.100012,210833648.0,13.311876,1294142.0,0.116903,1593399240,interface-down,3
4,16,20,48,41472,257790,12,2,2,1,5,...,0,1417154.0,0.099954,210834332.0,11.41332,1294148.0,0.099981,1593399300,interface-down,3


In [168]:
df_final[columns][:10]

Unnamed: 0,n_/devices/modules/Cisco-IOS-XE-bgp-oper/bgp-state-data/address-families/address-family/activities/prefixes,n_/devices/modules/Cisco-IOS-XE-bgp-oper/bgp-state-data/neighbors/neighbor/prefix-activity/sent/current-prefixes,v_/ports#link-tr-ssm-b-1-y/metrics/network-outgoing-bytes,n_/devices/modules/Cisco-IOS-XE-bgp-oper/bgp-state-data/address-families/address-family/as-path/total-entries,v_/ports#link-intgw-exgw-a-3-x/metrics/network-outgoing-bytes-rate,n_/devices/modules/Cisco-IOS-XE-bgp-oper/bgp-state-data/address-families/address-family/prefixes/total-entries,v_/devices#RR-01/power_state,n_/devices/modules/Cisco-IOS-XE-interfaces-oper/interfaces/interface/diffserv-info/diffserv-target-classifier-stats/queuing-stats/drop-pkts,v_/ports#link-tr-intgw-a-3-x/metrics/network-incoming-bytes,n_/devices/modules/Cisco-IOS-XE-bgp-oper/bgp-state-data/neighbors/neighbor/prefix-activity/sent/explicit-withdraw,...,v_/devices#TR-01/metrics/cpu_util,v_/ports#link-tr-intgw-b-1-x/metrics/network-outgoing-bytes,v_/ports#link-tr-intgw-a-2-x/metrics/network-outgoing-bytes-rate,v_/ports#link-tr-intgw-a-2-y/metrics/network-outgoing-bytes-rate,v_/devices#RR-01/metrics/disk-device-write-bytes-rate,v_/ports#link-tr-intgw-a-3-x/metrics/network-incoming-packets-rate,v_/ports#link-intgw-exgw-b-2-x/metrics/network-outgoing-bytes,v_/devices#TR-01/metrics/disk-device-write-bytes,v_/ports#link-tr-intgw-a-3-x/metrics/network-outgoing-packets,v_/ports#link-tr-intgw-a-3-y/metrics/network-incoming-packets
0,1665107,7835,14035076096252,3114,727527,7835,1,47368546,173941103,4,...,56,19157104171752,2129572,2428825,55,0,2929842342055,272801120,1936011,1939168
1,1664076,7835,13875043181825,4670,1043368,7835,1,40769033,173360991,4,...,50,19073871138194,1522930,724306,37,0,2894148784336,618949532,1928074,1931248
2,1663838,7835,13806590908239,4670,1230870,7835,1,37710445,173092400,5,...,53,19038093617971,1138817,610914,6,0,2879215228734,12202971,1922416,1925587
3,1663899,7835,13820527697427,4671,1232926,7835,1,38378736,173145430,6,...,51,19045348677003,1242803,529285,62,0,2882263917973,1475188576,1923176,1926347
4,1663861,7836,13812874966038,4671,1252564,7836,1,38004575,173119768,5,...,57,19041365895864,1906637,2850067,14,0,2880569318085,57152,1922719,1925890
5,1663855,7835,13813059716891,4670,1252944,7835,1,38014260,173119904,6,...,59,19041466908851,2712039,4858149,27,0,2880599216572,2201723589,1922685,1925856
6,1663851,7835,13813640596183,4671,1254925,7835,1,38052938,173124072,6,...,60,19041811632632,2779408,5333951,43,0,2880694734014,241536345,1922779,1925951
7,1665865,7836,14095277950101,4594,1246161,7836,1,50362918,174150656,4,...,58,19188913568384,2207665,3526519,6,0,2943486185686,445445079,1938209,1941359
8,1663930,7836,13834314915775,4670,1252716,7836,1,38935747,173207020,4,...,58,19052519784743,3693487,3424900,59,0,2885253196135,734265889,1923867,1927039
9,1663827,7835,13808460580922,4671,1228052,7835,1,37764120,173106029,6,...,62,19039046048288,3464990,3899949,107,0,2879593355660,20216917,1922305,1925476


# Save to CSV

In [170]:
df_final.to_csv('./csv/xgb/dataset_top100.csv', index=False)