# Google drive mount

In [None]:
# google drive에 있는 데이터를 사용하기 위해 접근

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Data Load & processing

In [None]:
import os, numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from tensorflow.keras.layers import Input, BatchNormalization, Conv2D, Concatenate, ReLU, MaxPool2D, GlobalAveragePooling2D, AveragePooling2D, AvgPool2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras import optimizers

In [None]:
# (1) "dfBH2.csv" 파일을 로드하시오

# 경로 저장 
data_path = '/content/drive/MyDrive/숨고/류재용님(densenet 구현)/data/'
data_list = os.listdir(data_path)
data_list.sort()


# 데이터 불러오기
data_x = pd.read_csv(data_path+data_list[0], sep='\t').iloc[:,1:]
data_y = np.load(data_path+data_list[1])
for i,j in zip(data_list[2::2], data_list[3::2]):
    data_x = pd.concat([data_x, pd.read_csv(data_path+i, sep='\t').iloc[:,1:]])
    data_y = np.concatenate((data_y,np.load(data_path+j)))



In [None]:
data_x

Unnamed: 0,5720,466,6009,2309,387,3553,427,5898,23365,6657,...,9738,6793,7358,58472,50865,23200,51293,10962,10153,874
0,-420.0,-616.0,539.0,484.0,95.0,878.0,82.0,-517.0,91.0,443.0,...,909.0,749.0,126.0,-687.0,-858.0,-411.0,-957.0,590.0,-558.0,535.0
1,561.0,-99.0,551.0,782.0,214.0,-862.0,-736.0,569.0,-252.0,164.0,...,828.0,637.0,236.0,-426.0,54.0,886.0,468.0,332.0,-226.0,620.0
2,254.0,-531.0,39.0,-792.0,-336.0,-604.0,-231.0,359.0,307.0,467.0,...,920.0,958.0,-475.0,-826.0,965.0,-568.0,-964.0,-310.0,529.0,489.0
3,-292.0,-323.0,564.0,-629.0,815.0,-667.0,236.0,-20.0,784.0,-53.0,...,855.0,-127.0,222.0,836.0,-907.0,-716.0,-212.0,605.0,-172.0,523.0
4,-660.0,360.0,-14.0,-566.0,-482.0,98.0,-506.0,-844.0,-249.0,-691.0,...,851.0,935.0,617.0,837.0,491.0,722.0,139.0,676.0,-284.0,-543.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17498,962.0,-701.0,465.0,123.0,789.0,-641.0,-822.0,-643.0,393.0,323.0,...,411.0,941.0,-933.0,899.0,-654.0,-858.0,-813.0,881.0,-374.0,-533.0
17499,184.0,-343.0,-551.0,-650.0,-617.0,562.0,-475.0,143.0,-261.0,39.0,...,-116.0,733.0,182.0,955.0,-469.0,259.0,179.0,-878.0,808.0,-893.0
17500,-338.0,-545.0,17.0,-339.0,-754.0,395.0,-525.0,-350.0,-546.0,800.0,...,934.0,670.0,-696.0,645.0,-950.0,686.0,-898.0,-928.0,684.0,-155.0
17501,460.0,-484.0,-716.0,-854.0,-665.0,898.0,-319.0,-91.0,-715.0,425.0,...,-9.0,614.0,-413.0,-286.0,-401.0,149.0,-106.0,-905.0,-523.0,-100.0


In [None]:
data_y

array([[1, 1, 0, ..., 0, 0, 0],
       [1, 1, 0, ..., 0, 0, 0],
       [1, 1, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

# densenet 생성

In [None]:
# train/test split 
train_x, test_x, train_y, test_y = train_test_split(data_x, data_y, test_size=0.2)

train_x = train_x.values.reshape((len(train_x),1,len(train_x.columns),1))
test_x = test_x.values.reshape((len(test_x),1,len(test_x.columns),1))

In [None]:
def densenet(n_classes, filters=32):

    # batch norm + relu + conv
    def bn_rl_conv(x,filters,kernel=1,strides=1):
        x=BatchNormalization()(x)
        x=ReLU()(x)
        x=Conv2D(filters,kernel,strides=strides,padding='same')(x)
        return x

    def dense_block(x, repetition):
        array=[x]
        for i in range(repetition):
            y=bn_rl_conv(x,4*filters)
            y=bn_rl_conv(y,filters,3)
            array.append(y)

        x=Concatenate()(array)
        return x

    def transition_layer(x):
        x=Conv2D(12,(1,1),strides=2,padding='same')(x)
        x=AvgPool2D(2,strides=2,padding='same')(x)
        return x

    input=Input(shape=(1,train_x.shape[2],1))

    x=Conv2D(64,1,strides=2,padding='same')(input)
    x=MaxPool2D(3,strides=2,padding='same')(x)

    for repetition in [6,12,24,16]:
        d=dense_block(x,repetition)
        x=transition_layer(d)

    x=GlobalAveragePooling2D()(d)

    output=Dense(data_y.shape[1], activation='sigmoid')(x)

    model=Model(input, output)
    return model

model=densenet(data_y.shape[1])
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 1, 978, 1)]  0           []                               
                                                                                                  
 conv2d_363 (Conv2D)            (None, 1, 489, 64)   128         ['input_4[0][0]']                
                                                                                                  
 max_pooling2d_3 (MaxPooling2D)  (None, 1, 245, 64)  0           ['conv2d_363[0][0]']             
                                                                                                  
 batch_normalization_348 (Batch  (None, 1, 245, 64)  256         ['max_pooling2d_3[0][0]']        
 Normalization)                                                                             

In [None]:
# 모델 학습
optimizer = optimizers.Adam(learning_rate=0.001)
model.compile(optimizer="adam",
              loss="binary_crossentropy",
              metrics=["accuracy"])

es = EarlyStopping(monitor='val_accuracy', mode='max', verbose=1, patience=15)

history = model.fit(train_x,
                    train_y,
                    callbacks=[es],
                    batch_size=64,
                    epochs=100,
                    validation_split=0.1,
                    shuffle=True)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100

KeyboardInterrupt: ignored

In [None]:
# 모델 저장
model.save_weights("/content/drive/MyDrive/숨고/류재용님(densenet 구현)/classification_weights.best.hdf5" )

# Evaluation

In [None]:
print('Accuracy :', accuracy_score(test_y, model.predict(test_x)>0.5))

Accuracy : 0.00337778813115136
