In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.layers import *
import keras
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.models import load_model
from keras.models import Model, load_model
from tensorflow.keras.utils import to_categorical
from keras.optimizers import adam_v2
from keras.layers import Input
from tensorflow.python.keras import regularizers

In [17]:
## file path
data_path = r'..\all_file'

train_data_process = 'train_for_textcnn'# processed data file path
model_name= './textcnn_with_attention_root1.h5'# model name

In [18]:
## load and process train label
df_train_label = pd.read_csv(os.path.join(data_path,'train_label.csv'),index_col='sample_index')
df_train_label['Root1'] = df_train_label['root-cause(s)'].apply(lambda x : 1 if 'rootcause1' in x else 0 )
df_train_label['Root2'] = df_train_label['root-cause(s)'].apply(lambda x : 1 if 'rootcause2' in x else 0 )
df_train_label['Root3'] = df_train_label['root-cause(s)'].apply(lambda x : 1 if 'rootcause3' in x else 0 )

y = df_train_label['Root1'].values

In [24]:
## load and process feature
features = ['feature0', 'feature1', 'feature2', 'feature11', 'feature12','feature13', 'feature15',
        'feature16', 'feature17','feature18',
        'feature19',
        'feature28_0', 'feature28_1', 'feature28_2', 'feature28_3',
        'feature28_4', 'feature28_5', 'feature28_6', 'feature28_7',
        'feature36_0', 'feature36_1', 'feature36_2', 'feature36_3',
        'feature36_4', 'feature36_5', 'feature36_6', 'feature36_7', 'feature60',
        'feature61_0', 'feature61_1', 'feature61_2', 'feature61_3',
        'feature61_4', 'feature61_5', 'feature61_6', 'feature61_7','feature_edge','feature_distance','length']
files = os.listdir(os.path.join(data_path,train_data_process))
print('Number of files',len(files))
files.sort(key=lambda x:int(x[:-4]))
all_feature = []
for filename in files:
    df = pd.read_csv(os.path.join(data_path,train_data_process,filename),index_col = 0)
    list_tmp = []
    for nd in features:
        for i in df[nd].values:
            if type(i) == str:
                if len(i.split(';'))> 1:
                    i = np.array(i.split(';')).astype(float).mean()
            list_tmp.append(i)
    all_feature.append(list_tmp)
all_feature = np.array(all_feature)
all_feature[np.isnan(all_feature)] = 0

Number of files 1407


In [25]:
## data augmentation
from imblearn.over_sampling import BorderlineSMOTE, ADASYN
X_resampled, y_resampled = BorderlineSMOTE().fit_resample(all_feature, y)
from collections import Counter
print(sorted(Counter(y_resampled).items()))

[(0, 1295), (1, 1295)]


In [26]:
## train and test data split 
from sklearn.model_selection import train_test_split
def dataprocess(teature,labels):
    x_train, x_test, y_train, y_test = train_test_split(teature, labels, test_size=0.2) 
    return x_train,y_train, x_test, y_test
feature_final = X_resampled.reshape(-1,len(features),30)
X_train, y_train, X_test, y_test = dataprocess(feature_final,y_resampled)

## Model

In [27]:
## attention modules
def attention_3d_block(inputs, time_steps):
    input_dim = int(inputs.shape[2])
    a = Permute((2, 1))(inputs)
    a = Dense(time_steps, activation='softmax')(a)
    if False:
        a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a)
        a = RepeatVector(input_dim)(a)
    a_probs = Permute((2, 1), name='attention_vec')(a)
    print(a_probs.shape, inputs.shape)
    output_attention_mul = Multiply()([inputs, a_probs])
    return output_attention_mul

In [29]:
## model
pool_output = []
kernel_sizes = [3, 4, 5]
main_input = Input(shape=(X_train.shape[1],30), dtype='float64')
O_seq = attention_3d_block(main_input, X_train.shape[1])
for kernel_size in kernel_sizes:
    c = Conv1D(filters=32, kernel_size=kernel_size, padding='same', strides=1)(O_seq)
    c = BatchNormalization()(c)
    c = Activation('relu')(c)
    p = MaxPooling1D(pool_size=2)(c)
    p = Flatten()(p)
    pool_output.append(p)
x_flatten = concatenate(pool_output)
x_flatten = Dropout(0.4)(x_flatten)
y = Dense(2,activation ='softmax',kernel_regularizer=regularizers.l1(0.01))(x_flatten)
model = Model(inputs=main_input, outputs=y)
# model.summary()

(None, 39, 30) (None, 39, 30)


In [None]:
## model train
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
Reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5,
                           mode='auto', cooldown=0, min_lr=0.000001, verbose = 1)
opt = adam_v2.Adam(learning_rate=0.001)
model.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics=['accuracy'])
callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, verbose=0),
    ModelCheckpoint(model_name, monitor='val_loss', mode='min', save_best_only=True),
    Reduce
]
print('\nTrain...')
one_hot_labels = to_categorical(y_train, num_classes=2) 
one_hot_labels_test = to_categorical(y_test, num_classes=2) 
history = model.fit(x = X_train, y = one_hot_labels,
                    batch_size=32,
                    epochs=300,
                    shuffle=True,
                    validation_data=(X_test, one_hot_labels_test),
                    callbacks=callbacks)

print("\nTesting...")
model = load_model(model_name)
score, accuracy = model.evaluate(X_test, one_hot_labels_test,
                                 batch_size=64,
                                 verbose=1)
print("Test loss:  ", score)
print("Test accuracy:  ", accuracy)


Train...
Epoch 1/300
Epoch 2/300
 1/65 [..............................] - ETA: 0s - loss: 0.6597 - accuracy: 0.9688



Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300