In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import time
from sklearn import preprocessing
import glob
from tqdm import tqdm
from keras.models import Sequential
from keras.layers import Dense,Reshape,Conv2D,Flatten,MaxPooling1D,Conv1D,LSTM,Bidirectional,GRU,Dropout
from attention import Attention
from tcn import TCN, tcn_full_summary
from tensorflow import compat,config
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from math import floor
import prettytable as pt
from contextlib import redirect_stdout

In [None]:
# random seed set
SEED = 1
from numpy.random import seed 
seed(SEED) 

#function to convert dataframe to into 2d array
def convert_2d(df_dup):
    data_frame = pd.DataFrame()
    for i in range(0, df_dup.shape[0]-59):
        is_anomaly = False
        mylist = []
        for j in range(i, i+60):
            mylist.append(df_dup['value'].iat[j])
            if df_dup['is_anomaly'].iat[j] == 1:
                is_anomaly = True
        if is_anomaly:
            mylist.append(1)
        else:
            mylist.append(0)
        np_Array = np.array(mylist)
        mylist = np_Array.T
        data_frame = data_frame.append(pd.Series(mylist), ignore_index=True)
    return data_frame

In [None]:
# display dataset example 
df=pd.read_csv("Dataset/real_1.csv")
df.head()

In [None]:
plt.plot(df.timestamp,df.value)
plt.xlabel("Timestamp")
plt.ylabel("NormalizedValue")
plt.title(" Example plot of web traffic after preprocessing ")
plt.show()

In [None]:
# Preprocessing the data
# dataset build
args_data = 1 # 方法1读取已经预处理好的数据，快；方法2读取原始数据进行预处理，慢

if args_data == 1:
    frame = pd.read_excel('dataframe.xlsx',engine='openpyxl')
elif args_data == 2:
    path = r'Dataset' #set the path accordingly
    all_files=glob.glob(path+"/*.csv")
    dataset_conc=[]
    for filename in tqdm(all_files):
        df=pd.read_csv(filename,index_col=None,header=0)
        df=df.replace(0,np.nan)
        df=df.dropna(axis=0, how='any',subset=['value'])
        df.value = preprocessing.normalize([df.value]).T
        dataset_conc.append(convert_2d(df)) 
    frame=pd.concat(dataset_conc,axis=0,ignore_index=True)
    #将dataframe写入excel方便查看
    if os.path.exists('dataframe.xlsx'):
        pass
    else:
        with pd.ExcelWriter('dataframe.xlsx') as writer:
            frame.to_excel(writer, sheet_name='Sheet1',startcol=0,index=False)
frame.head()


In [None]:
#split train & test dataset 
y=frame.iloc[:, 60]
X=frame.iloc[:, 0:60]
X_train = X[:int(X.shape[0]*0.7)]
X_test = X[int(X.shape[0]*0.7):]
y_train = y[:int(X.shape[0]*0.7)]
y_test = y[int(X.shape[0]*0.7):]

#reshaping the data 
X_train=X_train.to_numpy()
nrows, ncols = X_train.shape
X_train = X_train.reshape(nrows, ncols, 1)

X_test=X_test.to_numpy()
nrows, ncols = X_test.shape
X_test = X_test.reshape(nrows, ncols, 1)

y_test = y_test.to_numpy()

#converting y_train to categorical
y_train = to_categorical(y_train)

In [None]:
# %%writefile model.txt  #取消注释本行代码可以将该代码块内容写至txt，但是不会执行代码块内容

# define model structure
model=Sequential()
model.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu',input_shape=(60, 1),kernel_initializer='he_normal'))
model.add(MaxPooling1D(pool_size=2, strides=2))
model.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu',kernel_initializer='he_normal'))
model.add(MaxPooling1D(pool_size=2, strides=2))
model.add(Reshape((1,15*64)))
model.add(TCN(64, activation='tanh',return_sequences='True'))
model.add(Attention(units=32))
model.add(Flatten())
model.add(Dense(32, activation='tanh'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))
model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy'])
model.summary()

In [None]:
config.run_functions_eagerly(True)

t_start = time.time()
Time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(t_start)))
sTime = time.strftime("%m%d//%H%M_", time.localtime(int(t_start)))

# train parameter set
model_name = "CNN+TCN+A+D"
batch_size = 512
epoch = 500
activation = 'relu'


# 生成保存训练中间文件的文件夹
path = 'train/' + sTime + model_name
isExists=os.path.exists(path)
if not isExists:
    os.makedirs(path)
print('folder made')

print(model_name,'batch_size:',batch_size,'epochs:',epoch,'start time:',Time,'seed:',1)
sess = compat.v1.Session(config=compat.v1.ConfigProto(log_device_placement=True))
history=model.fit(x=X_train, y=y_train, batch_size=batch_size, epochs=epoch, verbose=1)
print('traing finished')

t_end = time.time()

In [None]:
# save trained model
model_file = path + '/trained_model.h5'
model.save(model_file)

# list all data in history
print(history.history.keys())
# plot accuracy
plt.plot(history.history['accuracy'])
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.savefig(path+'/accuracy.jpg')
plt.show()

# plot loss
plt.plot(history.history['loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.savefig(path+'/loss.jpg')
plt.show()

In [None]:
 #predicting on test data
predict_x=model.predict(X_test) 
y_pred=np.argmax(predict_x,axis=1)

#evaluate performance
Confusion_Matrix = confusion_matrix(y_test, y_pred)
Accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='binary')
recall = recall_score(y_test, y_pred, average='binary')
F1_Score = f1_score(y_test, y_pred, average='binary')


minute = floor((t_end-t_start)/60)
second = ((t_end-t_start)/60-minute)*60
if minute > 60:
    hour = floor(minute/60)
    minute = minute - 60*hour
    train_t = '%dh%dm%ds'%(hour,minute,second)
else:
    train_t = '%dm%ds'%(minute,second)

print("training time ",train_t)
print("Confusion_Matrix")
print(Confusion_Matrix)
print("Accuracy ", Accuracy)
print("Precision ", precision)
print("recall ", recall)
print("f1_score ", F1_Score)

In [None]:
# save result to txt
tb = pt.PrettyTable()
tb.field_names = ["Training Time", "Accuracy", "Precision", "Recall","F1 Score"]
tb.add_row([train_t,Accuracy,precision,recall,F1_Score])
tb1 = pt.PrettyTable()
tb1.field_names = ["Structure","batch_size", "epochs", "start time", "seed","activation"]
tb1.add_row([model_name,batch_size,epoch,Time,0,activation])
print(tb)
with open(path + '/result.txt','w+') as f:
    f.write(str(tb))
    f.write('\n')
    f.write(str(tb1))
    f.write('\n')
    f.write('\n')
    with redirect_stdout(f):
        model.summary()
    f.write('\n')
    # with open("model.txt", "r") as file: # 记录模型定义代码块
    #     data = file.read()  
    #     f.write(data)
print('tb finish')