<a href="https://colab.research.google.com/github/yeseul106/UROP_Software-Fault-Prediction/blob/main/Measure_metricsipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

In [None]:
# 구글 드라이브 연결
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# csv 파일 읽어오기
JM1_Weka = pd.read_csv('/content/drive/MyDrive/UROP/UROP dataset/JM1_Cfs_WEKA(PROMISE).csv', encoding='cp949')
print(JM1_Weka.shape)
JM1_Weka.head(5)

In [None]:
JM1_Weka.columns

# **Feature Selection을 한 경우**

- hidden layer : 3개
- hidden layer 노드 수 : 64개
- Dropout layer : 3개, 비율은 0.5
- 각 층의 활성화 함수: relu
- 출력층 활성화 함수: sigmoid
- optimizer 함수: adam
- batch size : 10

In [None]:
# 입력 데이터
input_data = JM1_Weka.iloc[:,:8].to_numpy()
print("input_data shape : ",input_data.shape)

In [None]:
input_data[0]

In [None]:
# 정답 레이블
y_label = JM1_Weka.iloc[:,8].to_numpy()
print("y_label shape : ",y_label.shape)

In [None]:
y_label[0]

In [None]:
cnt_true =0
cnt_false=0
# 정답 레이블 바꾸기
for i in range(len(y_label)):
  if y_label[i]:
    cnt_true+=1
  else:
    cnt_false+=1

In [None]:
print("fault-prone: ", cnt_true)
print("non-fault-prone: ", cnt_false)

In [None]:
y_label[0]

In [None]:
# 레이블 벡터화 
y_label = np.asarray(y_label).astype('float32')

In [None]:
y_label.shape

In [None]:
from sklearn import preprocessing

# 정규화 0-1로 하기
data_normalizer = preprocessing.MinMaxScaler() # 데이터를 0~1 범위로 점철되게 하는 함수 call
data_normalized = data_normalizer.fit_transform(input_data) # 데이터를 0~1 범위로 점철되게 함수 수행
data_normalized.shape

In [None]:
data_normalized[0]

# **5-fold cross-valid**

In [None]:
from sklearn.model_selection import StratifiedKFold
from keras import models
from keras import layers
from keras.callbacks import EarlyStopping
import tensorflow as tf

# 5개의 파일로 쪼갬
n_fold = 5
seed = 0
skf = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=seed)

#빈 accuracy 배열
accuracy = []
loss = []
auc = []
FPR = []
FNR = []

In [None]:
from typing import Pattern
# 학습 자동 중단 설정
early_stopping_callback = EarlyStopping(monitor='loss', patience=100)

In [None]:
for train, test in skf.split(data_normalized, y_label):
  model = models.Sequential()
  model.add(layers.Dense(32, input_shape = (8, )))
  model.add(layers.BatchNormalization()) 
  model.add(layers.Activation('relu'))
  model.add(layers.Dropout(0.5))       
  model.add(layers.Dense(32))
  model.add(layers.BatchNormalization())
  model.add(layers.Activation('relu'))
  model.add(layers.Dropout(0.5))
  model.add(layers.Dense(32))
  model.add(layers.BatchNormalization())
  model.add(layers.Activation('relu'))
  model.add(layers.Dropout(0.5))
  model.add(layers.Dense(32))
  model.add(layers.BatchNormalization())
  model.add(layers.Activation('relu'))
  model.add(layers.Dropout(0.5))
  model.add(layers.Dense(32))
  model.add(layers.BatchNormalization())
  model.add(layers.Activation('relu'))
  model.add(layers.Dropout(0.5))
  model.add(layers.Dense(1))
  model.add(layers.Activation('sigmoid'))

  model.compile(optimizer='adam',
             loss='binary_crossentropy',
             metrics=[tf.keras.metrics.TruePositives(),tf.keras.metrics.FalseNegatives(),tf.keras.metrics.FalsePositives(),tf.keras.metrics.TrueNegatives(),"AUC"])
  hist = model.fit(data_normalized[train], y_label[train], epochs=600, batch_size=5, verbose=1, callbacks=[early_stopping_callback])
  TP = model.evaluate(data_normalized[test],y_label[test])[1]
  FN = model.evaluate(data_normalized[test],y_label[test])[2]
  FP = model.evaluate(data_normalized[test],y_label[test])[3]
  TN = model.evaluate(data_normalized[test],y_label[test])[4]
  k_accuracy = "%.4f"%((TP+TN)/(TP+FP+FN+TN))
  k_loss = "%.4f"%(model.evaluate(data_normalized[test],y_label[test])[0])
  k_FPR = "%.4f"%((FP)/(FP+TN))
  k_FNR="%.4f"%((FN)/(TP+FN))
  k_auc = "%.4f"%(model.evaluate(data_normalized[test],y_label[test])[5])
  accuracy.append(k_accuracy)
  loss.append(k_loss)
  FNR.append(k_FNR)
  FPR.append(k_FPR)
  auc.append(k_auc)
 

In [None]:
print("\n %.f fold accuracy:" % n_fold, accuracy)
print("모든 fold의 평균 auc: ", (float(accuracy[0]) + float(accuracy[1]) + float(accuracy[2]) + float(accuracy[3]) + float(accuracy[4])) / 5)

print("\n %.f fold loss:" % n_fold, loss)
print("모든 fold의 평균 loss: ", (float(loss[0]) + float(loss[1]) + float(loss[2]) + float(loss[3]) + float(loss[4])) / 5)

print("\n %.f fold FNR:" % n_fold, FNR)
print("모든 fold의 평균 FNR: ", (float(FNR[0]) + float(FNR[1]) + float(FNR[2]) + float(FNR[3]) + float(FNR[4])) / 5)

print("\n %.f fold FPR:" % n_fold, FPR)
print("모든 fold의 평균 FPR: ", (float(FPR[0]) + float(FPR[1]) + float(FPR[2]) + float(FPR[3]) + float(FPR[4])) / 5)

print("\n %.f fold auc:" % n_fold, auc)
print("모든 fold의 평균 auc: ", (float(auc[0]) + float(auc[1]) + float(auc[2]) + float(auc[3]) + float(auc[4])) / 5)