In [1]:
import pandas as pd
import numpy as np
import random
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
%matplotlib inline
import pandas_profiling
from tqdm import tqdm
from sklearn.metrics import roc_auc_score,confusion_matrix,accuracy_score,classification_report,roc_curve
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing
import sklearn
import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader

from utils.autoencoder import autoencoder
from utils.Data_gen import Data_gen
from utils.get_result import get_result

np.random.seed(123)
random.seed(123)

train_data_df = pd.read_csv("./data/unsw/UNSW_NB15_training-set.csv")
test_data_df = pd.read_csv("./data/unsw/UNSW_NB15_testing-set.csv")

##label 0:n, 1:an
data_df=pd.concat([train_data_df,test_data_df],axis=0)
data_df=data_df.drop(["id"],axis=1)
data_df=data_df.drop(["attack_cat"],axis=1)

cat_vars=["proto","service","state","is_ftp_login","is_sm_ips_ports"]
cat_data = pd.get_dummies(data_df[cat_vars])

numeric_vars = list(set(data_df.columns.values.tolist()) - set(cat_vars))
numeric_vars.remove('label')
numeric_data = data_df[numeric_vars].copy()

label_data = data_df['label']

final_data_df=pd.concat([numeric_data, cat_data, label_data], axis=1).reset_index(drop=True)

In [2]:
labels = final_data_df['label'].copy()
num_data = len(labels)
num_normal = np.sum(labels ==0)
normal_ratio = num_normal/num_data
print('normal_ratio(%)', normal_ratio*100, num_normal)

def reduce_data(df, ano_cnt):
    #num_anomalies = int(num_normal * (anomal_ratio /(1-anomal_ratio)))
    num_anomalies =ano_cnt
    print('number of anomalies = ', num_anomalies, 'number of normal = ', num_normal)
    anomal_labels = labels[labels !=0]
    anomal_idx = np.random.choice(anomal_labels.index, size = num_anomalies, replace = False)
    anomal_data = df.iloc[anomal_idx].copy()
    normal_data = df[labels ==0].copy()
    print('anomal_data shape = ', anomal_data.shape)
    print('normal_data shape = ',normal_data.shape)
    return pd.concat([normal_data, anomal_data], axis = 0).reset_index(drop=True)

ano_cnt=5000
final_data_reduce_df = reduce_data(final_data_df, ano_cnt)
print('reduced_data shape = ', final_data_reduce_df.shape)

input_size=len(final_data_reduce_df.columns)-1


test_an_df=final_data_reduce_df.loc[final_data_reduce_df["label"]==1].reset_index(drop=True)
n_df=final_data_reduce_df.loc[final_data_reduce_df["label"]==0].reset_index(drop=True)
n_df=sklearn.utils.shuffle(n_df).reset_index(drop=True)
test_n_df=n_df.iloc[-ano_cnt:].reset_index(drop=True)
test_df=pd.concat([test_an_df,test_n_df],axis=0).reset_index(drop=True)
train_df=n_df.iloc[:-ano_cnt].reset_index(drop=True)

min_max_scaler = preprocessing.MinMaxScaler()
train_df = min_max_scaler.fit_transform(train_df)
test_df = min_max_scaler.transform(test_df)

train_df = pd.DataFrame(train_df)
test_df = pd.DataFrame(test_df)

train_label=train_df[196]
train_df=train_df.drop([196],axis=1)

y_test=test_df[196]
x_test=test_df.drop([196],axis=1)


x_train, x_val, y_train, y_val = train_test_split(train_df,
                                                    train_label,
                                                    test_size=0.2, 
                                                    random_state=1029)

normal_ratio(%) 36.09225646458884 93000
number of anomalies =  5000 number of normal =  93000
anomal_data shape =  (5000, 197)
normal_data shape =  (93000, 197)
reduced_data shape =  (98000, 197)


In [3]:
batch_size = int(len(x_train))

x_train_dataset = Data_gen(x_train)
x_val_dataset = Data_gen(x_val)
x_test_dataset = Data_gen(x_test)

train_dataloader = DataLoader(x_train_dataset, batch_size=batch_size, shuffle=True, num_workers = 0)
val_dataloader = DataLoader(x_val_dataset, batch_size=batch_size, shuffle=True, num_workers = 0)
test_dataloader = DataLoader(x_test_dataset, batch_size=batch_size, shuffle=True, num_workers = 0)

model = autoencoder(input_size)
model = model.cuda()
criterion = torch.nn.MSELoss()
learning_rate = .01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

save_model_path = 'ae_model.pth'
FULL_TRAIN = True
num_epochs = 100
if (FULL_TRAIN):
    for epoch in range(num_epochs):
        running_training_loss =0.0
        running_val_loss = 0.0

        for i_batch, sample_batched in enumerate(train_dataloader):
            sample_batched = sample_batched.cuda()
            sample_batched = Variable(sample_batched)
            en_out,de_out,latant_v,output = model(sample_batched)
            train_loss = criterion(output, sample_batched)
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
            running_training_loss += train_loss.item()

        if (epoch%1 == 0):
            for i_batch, sample_batched in enumerate(val_dataloader):
                sample_batched = sample_batched.cuda()
                sample_batched = Variable(sample_batched)
                en_out,de_out,latant_v,val_output = model(sample_batched)
                val_loss = criterion(val_output, sample_batched)
                running_val_loss += val_loss.item()
            print(">>>> @ {}, traing_loss = {}, val_loss = {}".format(epoch+1, 
                                                                       running_training_loss/len(train_dataloader),
                                                                       running_val_loss/len(val_dataloader)))


        #print(">>>> @ {}, traing_loss = {}, val_loss = {}".format(epoch+1,running_training_loss/len(train_dataloader),running_val_loss/len(val_dataloader)))
        torch.save(model.state_dict(), save_model_path)
else:
    print("Load previous trained model")
    model.load_state_dict(torch.load(save_model_path))
    
from sklearn.metrics import roc_curve, auc, confusion_matrix
from sklearn.metrics import roc_auc_score,confusion_matrix,accuracy_score,classification_report,roc_curve


x_test_array = x_test.values
x_test_torch = torch.from_numpy(x_test_array).type(torch.FloatTensor)
x_test_variable = Variable(x_test_torch).cuda()
test_en_out,test_de_out,test_latant_v,x_test_recon = model(x_test_variable)

x_train_array = x_train.values
x_train_torch = torch.from_numpy(x_train_array).type(torch.FloatTensor)
x_train_variable = Variable(x_train_torch).cuda()
train_en_out,train_de_out,train_latant_v,x_train_recon = model(x_train_variable)

x_train_recon=x_train_recon.detach().cpu().numpy()
train_latant_v=train_latant_v.detach().cpu().numpy()
x_test_recon=x_test_recon.detach().cpu().numpy()
test_latant_v=test_latant_v.detach().cpu().numpy()

test_en_out=[x.detach().cpu().numpy() for x in test_en_out]
train_en_out=[x.detach().cpu().numpy() for x in train_en_out]
test_de_out=[x.detach().cpu().numpy() for x in test_de_out]
train_de_out=[x.detach().cpu().numpy() for x in train_de_out]

>>>> @ 1, traing_loss = 0.03419903293251991, val_loss = 0.02820383571088314
>>>> @ 2, traing_loss = 0.028260204941034317, val_loss = 0.023678477853536606
>>>> @ 3, traing_loss = 0.023726824671030045, val_loss = 0.020842963829636574
>>>> @ 4, traing_loss = 0.02088170312345028, val_loss = 0.017485348507761955
>>>> @ 5, traing_loss = 0.017520131543278694, val_loss = 0.016042882576584816
>>>> @ 6, traing_loss = 0.016072766855359077, val_loss = 0.015605912543833256
>>>> @ 7, traing_loss = 0.015626614913344383, val_loss = 0.01522442139685154
>>>> @ 8, traing_loss = 0.015242310240864754, val_loss = 0.014436888508498669
>>>> @ 9, traing_loss = 0.014461543411016464, val_loss = 0.013787755742669106
>>>> @ 10, traing_loss = 0.013821247965097427, val_loss = 0.01310693845152855
>>>> @ 11, traing_loss = 0.013148360885679722, val_loss = 0.012485438026487827
>>>> @ 12, traing_loss = 0.01253407821059227, val_loss = 0.011708720587193966
>>>> @ 13, traing_loss = 0.011765974573791027, val_loss = 0.0111878

In [4]:
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import pdist, cdist, squareform
from sklearn import covariance
from scipy.spatial import distance
import warnings
warnings.filterwarnings(action='ignore')

group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
group_lasso.fit(x_train_array)
incov=group_lasso.precision_
mean=np.mean(x_train_array,axis=0)
mean=list(mean)
scores=[]
for i in range(len(x_test_array)):
    scores=scores+[distance.mahalanobis(mean, x_test_array[i], incov)]
ma_scores=np.array(scores)

iso_Forest = IsolationForest(n_estimators=100,random_state=123)
iso_Forest.fit(x_train_array)
if_scores=iso_Forest.decision_function(x_test_array)

ae_scores = np.mean((x_test_array - x_test_recon)**2, axis=1)

input_1=[train_en_out[0],test_en_out[0]]
input_2=[train_en_out[1],test_en_out[1]]
input_3=[train_en_out[2],test_en_out[2]]
input_4=[train_en_out[3],test_en_out[3]]
input_5=[train_latant_v,test_latant_v]
input_6=[train_de_out[0],test_de_out[0]]
input_7=[train_de_out[1],test_de_out[1]]
input_8=[train_de_out[2],test_de_out[2]]
input_9=[train_de_out[3],test_de_out[3]]
input_10=[train_de_out[4],test_de_out[4]]

input = input_1
group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
group_lasso.fit(input[0])
incov=group_lasso.precision_
mean=np.mean(input[0],axis=0)
mean=list(mean)
scores=[]
for i in range(len(input[1])):
    scores=scores+[distance.mahalanobis(mean, input[1][i], incov)]
scores1=np.array(scores)

input = input_2
group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
group_lasso.fit(input[0])
incov=group_lasso.precision_
mean=np.mean(input[0],axis=0)
mean=list(mean)
scores=[]
for i in range(len(input[1])):
    scores=scores+[distance.mahalanobis(mean, input[1][i], incov)]
scores2=np.array(scores)

input = input_3
group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
group_lasso.fit(input[0])
incov=group_lasso.precision_
mean=np.mean(input[0],axis=0)
mean=list(mean)
scores=[]
for i in range(len(input[1])):
    scores=scores+[distance.mahalanobis(mean, input[1][i], incov)]
scores3=np.array(scores)

input = input_4
group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
group_lasso.fit(input[0])
incov=group_lasso.precision_
mean=np.mean(input[0],axis=0)
mean=list(mean)
scores=[]
for i in range(len(input[1])):
    scores=scores+[distance.mahalanobis(mean, input[1][i], incov)]
scores4=np.array(scores)

input = input_5
group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
group_lasso.fit(input[0])
incov=group_lasso.precision_
mean=np.mean(input[0],axis=0)
mean=list(mean)
scores=[]
for i in range(len(input[1])):
    scores=scores+[distance.mahalanobis(mean, input[1][i], incov)]
scores5=np.array(scores)

input = input_6
group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
group_lasso.fit(input[0])
incov=group_lasso.precision_
mean=np.mean(input[0],axis=0)
mean=list(mean)
scores=[]
for i in range(len(input[1])):
    scores=scores+[distance.mahalanobis(mean, input[1][i], incov)]
scores6=np.array(scores)

input = input_7
group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
group_lasso.fit(input[0])
incov=group_lasso.precision_
mean=np.mean(input[0],axis=0)
mean=list(mean)
scores=[]
for i in range(len(input[1])):
    scores=scores+[distance.mahalanobis(mean, input[1][i], incov)]
scores7=np.array(scores)

input = input_8
group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
group_lasso.fit(input[0])
incov=group_lasso.precision_
mean=np.mean(input[0],axis=0)
mean=list(mean)
scores=[]
for i in range(len(input[1])):
    scores=scores+[distance.mahalanobis(mean, input[1][i], incov)]
scores8=np.array(scores)

input = input_9
group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
group_lasso.fit(input[0])
incov=group_lasso.precision_
mean=np.mean(input[0],axis=0)
mean=list(mean)
scores=[]
for i in range(len(input[1])):
    scores=scores+[distance.mahalanobis(mean, input[1][i], incov)]
scores9=np.array(scores)

input = input_10
group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
group_lasso.fit(input[0])
incov=group_lasso.precision_
mean=np.mean(input[0],axis=0)
mean=list(mean)
scores=[]
for i in range(len(input[1])):
    scores=scores+[distance.mahalanobis(mean, input[1][i], incov)]
scores10=np.array(scores)

efe_scores=[scores1,scores2,scores3,scores4,scores5]
dfe_scores=[scores6,scores7,scores8,scores9,scores10]

scores=[ma_scores]+[if_scores]+[ae_scores]+efe_scores+dfe_scores


In [5]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_curve, auc, roc_auc_score, average_precision_score
from sklearn.metrics import precision_score, recall_score, f1_score

scores_df=pd.DataFrame({"ma_scores":scores[0],
                        "if_scores":scores[1],
                        "ae_scores":scores[2],
                        "efe_scores_1":scores[3],
                        "efe_scores_2":scores[4],
                        "efe_scores_3":scores[5],
                        "efe_scores_4":scores[6],
                        "efe_scores_5":scores[7],
                        "dfe_scores_1":scores[8],
                        "dfe_scores_2":scores[9],
                        "dfe_scores_3":scores[10],
                        "dfe_scores_4":scores[11],
                        "dfe_scores_5":scores[12],
                        "label":y_test})

x=scores_df.drop("label",axis=1)
x=scores_df[["ma_scores"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("ma_scores") #only mahal of input
print(test_acc)
print(precision)
print(recall)
print(AUROC)
print(AUPRC)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

x=scores_df.drop("label",axis=1)
x=scores_df[["if_scores"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("if_scores") #isolation forest
print(test_acc)
print(precision)
print(recall)
print(AUROC)
print(AUPRC)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

x=scores_df.drop("label",axis=1)
x=scores_df[["ae_scores"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("ae_scores") # use recon loss
print(test_acc)
print(precision)
print(recall)
print(AUROC)
print(AUPRC)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

x=scores_df.drop("label",axis=1)
x=scores_df[["efe_scores_1","efe_scores_2","efe_scores_3","efe_scores_4","efe_scores_5"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("efe_scores") # use encoder mahal 
print(test_acc)
print(precision)
print(recall)
print(AUROC)
print(AUPRC)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

x=scores_df.drop("label",axis=1)
x=scores_df[["dfe_scores_1","dfe_scores_2","dfe_scores_3","dfe_scores_4","dfe_scores_5"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("dfe_scores") # use decoder mahal
print(test_acc) 
print(precision)
print(recall)
print(AUROC)
print(AUPRC)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

x=scores_df.drop("label",axis=1)
x=scores_df[["ma_scores","efe_scores_1","efe_scores_2","efe_scores_3","efe_scores_4","efe_scores_5"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("mahal+efe_scores") # use mahal for input and encoder mahal
print(test_acc) 
print(precision)
print(recall)
print(AUROC)
print(AUPRC)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

x=scores_df.drop("label",axis=1)
x=scores_df[["ma_scores","dfe_scores_1","dfe_scores_2","dfe_scores_3","dfe_scores_4","dfe_scores_5"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("mahal+dfe_scores") # use mahal for input and decoder mahal
print(test_acc) 
print(precision)
print(recall)
print(AUROC)
print(AUPRC)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

x=scores_df.drop("label",axis=1)
x=scores_df[["ma_scores","ae_scores"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("mahal+ae_scores") # use mahal for input and recon loss
print(test_acc)
print(precision)
print(recall)
print(AUROC)
print(AUPRC)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

x=scores_df.drop("label",axis=1)
x=scores_df[["efe_scores_1","efe_scores_2","efe_scores_3","efe_scores_4","efe_scores_5","dfe_scores_1","dfe_scores_2","dfe_scores_3","dfe_scores_4","dfe_scores_5"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("efe_scores+dfe_scores") # use encoder mahal and decoder mahal
print(test_acc)
print(precision)
print(recall)
print(AUROC)
print(AUPRC)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

x=scores_df.drop("label",axis=1)
x=scores_df[["ae_scores","efe_scores_1","efe_scores_2","efe_scores_3","efe_scores_4","efe_scores_5"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("ae_scores+efe_scores") # use encoder mahal and recon loss
print(test_acc)
print(precision)
print(recall)
print(AUROC)
print(AUPRC)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

x=scores_df.drop("label",axis=1)
x=scores_df[["ae_scores","dfe_scores_1","dfe_scores_2","dfe_scores_3","dfe_scores_4","dfe_scores_5"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("ae_scores+dfe_scores") # use decoder mahal and recon loss
print(test_acc)
print(precision)
print(recall)
print(AUROC)
print(AUPRC)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

x=scores_df.drop("label",axis=1)
x=scores_df[["ma_scores","efe_scores_1","efe_scores_2","efe_scores_3","efe_scores_4","efe_scores_5","dfe_scores_1","dfe_scores_2","dfe_scores_3","dfe_scores_4","dfe_scores_5"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("ma_scores+efe_scores+dfe_scores") # # use mahal for input, encoder mahal and decoder mahal
print(test_acc)
print(precision)
print(recall)
print(AUROC)
print(AUPRC)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

x=scores_df.drop("label",axis=1)
x=scores_df[["ma_scores","ae_scores","efe_scores_1","efe_scores_2","efe_scores_3","efe_scores_4","efe_scores_5"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("ma_scores+ae_scores+efe_scores") # # use mahal for input, recon loss, encoder mahal
print(test_acc)
print(precision)
print(recall)
print(AUROC)
print(AUPRC)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

x=scores_df.drop("label",axis=1)
x=scores_df[["ma_scores","ae_scores","dfe_scores_1","dfe_scores_2","dfe_scores_3","dfe_scores_4","dfe_scores_5"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("ma_scores+ae_scores+dfe_scores") # # use mahal for input, recon loss, decoder mahal
print(test_acc)
print(precision)
print(recall)
print(AUROC)
print(AUPRC)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

x=scores_df.drop("label",axis=1)
x=scores_df[["ae_scores","efe_scores_1","efe_scores_2","efe_scores_3","efe_scores_4","efe_scores_5","dfe_scores_1","dfe_scores_2","dfe_scores_3","dfe_scores_4","dfe_scores_5"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("ae_scores+efe_scores+dfe_scores") # # use recon loss, encoder mahal, and decoder mahal
print(test_acc)
print(precision)
print(recall)
print(AUROC)
print(AUPRC)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

x=scores_df.drop("label",axis=1)
x=scores_df[["ma_scores","ae_scores","efe_scores_1","efe_scores_2","efe_scores_3","efe_scores_4","efe_scores_5","dfe_scores_1","dfe_scores_2","dfe_scores_3","dfe_scores_4","dfe_scores_5"]]
y=scores_df["label"].values
clf = LogisticRegression(random_state=0).fit(x, y)
model=clf
test_acc = model.score(x, y)
AUROC = roc_auc_score(y,model.predict_proba(x)[:,1])
AUPRC = average_precision_score(y,model.predict_proba(x)[:,1])
recall = recall_score(y,  model.predict(x))
precision = precision_score(y, model.predict(x))
f1_score_=f1_score(y_true=y, y_pred =model.predict(x))
fpr, tpr, thresholds = roc_curve(y,model.predict_proba(x)[:,1])
tn, fp, fn, tp = confusion_matrix(scores_df["label"].values, model.predict(x)).ravel()

print("ma_scores+ae_scores+efe_scores+dfe_scores") # # use mahal for input, recon loss, encoder mahal, and decoder mahal
print(test_acc)
print(AUROC)
print(AUPRC)
print(precision)
print(recall)
print(f1_score_)
print(fp/(fp+tn))
print("\n")

ma_scores
0.796
0.8931987247608927
0.6724
0.8401023800000001
0.869775326573813
0.7672295755362848
0.0804


if_scores
0.7301
0.7660115606936416
0.6626
0.79362918
0.8057846527418319
0.7105630026809652
0.2024


ae_scores
0.8295
0.88039713691988
0.7626
0.8415485400000001
0.8910613406954397
0.8172757475083057
0.1036


efe_scores
0.853
0.9486527707168276
0.7464
0.8885054600000001
0.9202248030108623
0.8354600402955003
0.0404


dfe_scores
0.83
0.9239465570400822
0.7192
0.8878753
0.916553344147357
0.808816914080072
0.0592


mahal+efe_scores
0.8568
0.9493702770780856
0.7538
0.90141666
0.9267794694792562
0.8403567447045708
0.0402


mahal+dfe_scores
0.8363
0.9273189326556543
0.7298
0.8897139000000001
0.9185980110121231
0.8167879127028539
0.0572


mahal+ae_scores
0.7981
0.8946253640455388
0.6758
0.84152106
0.871444050396915
0.7699669590976415
0.0796


efe_scores+dfe_scores
0.8592
0.9463220675944334
0.7616
0.9149979
0.9355900929714418
0.8439716312056738
0.0432


ae_scores+efe_scores
0.853
0.94865277