<a href="https://colab.research.google.com/github/yunha-sookmyung/Deep-Song/blob/master/deepsong_6%2C1%2C3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **ZIP파일로 모델 불러오는 과정**


In [0]:
# 런타임 -> 런타임 유형변경 -> 하드웨어 가속도 TPU변경
%tensorflow_version 2.x
#런타임 -> 런타임 다시시작

# 예전 방식 : !pip install tensorflow-gpu==2.0.0-rc1

In [0]:
from google.colab import files 

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
! mkdir deepsong                       # 마운트에 폴더 생성
! unzip deepsong.zip -d ./deepsong    # unzip 

# **텐서플로우 및 케라스 버전 확인**

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

In [0]:
import tensorflow as tf
from tensorflow import keras

print(tf.__version__)     # 텐서플로우 버전확인 (colab의 기본버전은 1.15.0) --> 2.0 변경 "%tensorflow_version 2.x"
print(keras.__version__)
tf.random.set_seed(99)

# **모델 전처리 과정 - IMAGES, LABELS**

In [0]:
from PIL import Image
import os, glob, numpy as np

caltech_dir = './deepsong/train'
categories = ["volcano", "earth+mount", "fire", "heavysnow", "city", "nature"]
nb_classes = len(categories)

image_w = 256
image_h = 256

pixels = image_h * image_w * 3

x = []
y = []

for idx, name in enumerate(categories):
    
    #CATEGORY별로 원 핫 인코딩을 진행해 LABEL을 만듦.
    
    label = [0 for i in range(nb_classes)]
    label[idx] = 1

    #이미지 경로는 TRAIN 폴더 안에 카데고리별 이름으로 되어있음!
    #카테고리별 이미지 파일 개수 알려주기!
    image_dir = caltech_dir + "/" + name
    files = glob.glob(image_dir+"/*.*")
    print(name, " 파일 길이 : ", len(files))

    #image를 불러와 전처리 과정을 거친 뒤 x에는 image의 배열형태의 데이터를
    #y에는 위에서 원핫인코딩을 한 label을 할당한다.
    for i, f in enumerate(files):
        img = Image.open(f)
        img = img.convert("RGB")
        img = img.resize((image_w, image_h))
        data = np.asarray(img)

        x.append(data)
        y.append(label)

        if i % 700 == 0:
            print(name, " : ", f)

x = np.array(x)
y = np.array(y)

#1 0 0 0 0 0 이면 volcano
#0 0 1 0 0 0 이면 fire 이런식

print("ok", len(y))

# **파라미터 값 지정**

In [0]:
learning_rate = 0.01  # 러닝레이트 
training_epochs = 60   # 에폭
batch_size = 32        # 배치사이즈
Buffer_size = 100      # 인스턴스 버퍼 사이즈

# **train과 val dataset**
# **image 정규화**

In [0]:
import sklearn
from sklearn.model_selection import train_test_split

# Train set, val set으로 나누기 (80:20으로 나누기)
train_images, val_images, train_labels, val_labels = train_test_split(x, y,train_size=0.85, test_size=0.15, random_state=1,shuffle=True)

import os, glob
import numpy as np
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt
import keras.backend.tensorflow_backend as K

#데이터 정규화

train_images = train_images.astype('float32')
train_images = train_images / 255

val_images = val_images.astype('float32')
val_images = val_images / 255

print(np.shape(train_images), np.shape(train_labels))
print(np.shape(val_images), np.shape(val_labels))

# dataset 인스턴스 만들기
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(Buffer_size).batch(batch_size)
val_dataset = tf.data.Dataset.from_tensor_slices((val_images, val_labels)).batch(batch_size)

# **CNN 모델 구조 생성**

In [0]:
#CNN 모델 구조
def create_model():
    model = keras.Sequential() # Sequential 모델 시작
    model.add(keras.layers.Conv2D(filters=64, kernel_size=3, strides=1, activation=tf.nn.relu, padding='SAME', input_shape=(256, 256, 3)))
    model.add(keras.layers.MaxPool2D(padding='SAME'))
    model.add(keras.layers.Conv2D(filters=64, kernel_size=3, strides=1, activation=tf.nn.relu, padding='SAME'))
    model.add(keras.layers.MaxPool2D(padding='SAME'))
    model.add(keras.layers.Conv2D(filters=64, kernel_size=3, strides=1, activation=tf.nn.relu, padding='SAME'))
    model.add(keras.layers.MaxPool2D(padding='SAME'))
    model.add(keras.layers.Conv2D(filters=64, kernel_size=3, strides=1, activation=tf.nn.relu, padding='SAME'))
    model.add(keras.layers.MaxPool2D(padding='SAME'))
    model.add(keras.layers.Conv2D(filters=64, kernel_size=3, strides=1, activation=tf.nn.relu, padding='SAME'))
    model.add(keras.layers.MaxPool2D(padding='SAME'))
    model.add(keras.layers.Conv2D(filters=64, kernel_size=3, strides=1, activation=tf.nn.relu, padding='SAME'))
    model.add(keras.layers.MaxPool2D(padding='SAME'))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(256, activation=tf.nn.relu))
    model.add(keras.layers.Dropout(0.5))
    model.add(keras.layers.Dense(256, activation=tf.nn.relu))
    model.add(keras.layers.Dropout(0.5))
    model.add(keras.layers.Dense(6, activation=tf.nn.softmax))   #  Dense층
    return model



In [0]:
model = create_model() # 모델 함수를 model로 변경
model.summary() # 모델에 대한 요약 출력해줌

In [0]:
# 위에서 정한 모델을 그림으로(plot) 보여줌
keras.utils.plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True) 

# **Train과 Validation 진행**

In [0]:
import keras.backend as K


model.compile(loss='categorical_crossentropy',
              optimizer = 'SGD',
              metrics = ['accuracy'])

# 학습실행
history = model.fit(train_images, train_labels,         # 입력값
          batch_size=batch_size,                        # 1회마다 배치마다 100개 프로세스
          epochs=training_epochs,                       # 60회 학습
          verbose=1,                                    # verbose는 학습 중 출력되는 문구를 설정하는 것 
          validation_data=(val_images, val_labels))     # val로 사용


# test 값 결과 확인
score = model.evaluate(val_images, val_labels, verbose=0) # verbose가 0 이면 ==== 움직이지 않고, 1이면 ==== 진행 바가 움직임
print('VAl loss :', score[0])
print('VAl ACCURACY :', score[1])

# **train 및 validation 결과 그래프**

In [0]:
import matplotlib.pyplot as plt
import numpy as np
import os

# 모델 학습 후 정보가 담긴 history 내용을 토대로 선 그래프를 그리는 함수 설정


def plot_loss(history, title=None):     # Loss Visualization
    # summarize history for loss
    if not isinstance(history, dict):
        history = history.history

    plt.plot(history['loss'])           # loss
    plt.plot(history['val_loss'])       # validation
    if title is not None:
        plt.title(title)
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Training data', 'Validation data'], loc=0)
    # plt.show()

def plot_accuracy(history, title=None):     # Loss Visualization
    # summarize history for loss
    if not isinstance(history, dict):
        history = history.history

    plt.plot(history['accuracy'])           # loss
    plt.plot(history['val_accuracy'])       # validation
    if title is not None:
        plt.title(title)
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Training data', 'Validation data'], loc=0)
    # plt.show()

In [0]:
# Visualization
plot_loss(history, '(a) Loss')  # 학습 경과에 따른 정확도 변화 추이
plt.show()
plot_accuracy(history, '(b)) Accuracy')
plt.show()

# **TEST 단계**

In [0]:
model.save('deepsong.h5') # 모델 아키텍처와 모델 가중치 저장

In [0]:
from tensorflow.keras.models import load_model
model = load_model('deepsong.h5')

In [0]:
test_dir = './deepsong/test'
categories = ["volcano", "earth+mount", "fire", "heavysnow", "city", "nature"]
nb_classes = len(categories)

image_w = 256
image_h = 256

pixels = image_h * image_w * 3

test_image = []
test_label = []

for idx, name in enumerate(categories):
    
    #one-hot 돌리기.
    label = [0 for i in range(nb_classes)]
    label[idx] = 1

    image_dir = test_dir + "/" + name
    files = glob.glob(image_dir+"/*.*")
    print(name, " 파일 길이 : ", len(files))
    for i, f in enumerate(files):
        img = Image.open(f)
        img = img.convert("RGB")
        img = img.resize((image_w, image_h))
        data = np.asarray(img)

        test_image.append(data)
        test_label.append(label)

        if i % 700 == 0:
            print(name, " : ", f)


test_image = np.array(test_image)
test_label = np.array(test_label)

#test image 정규화
test_image = np.array(test_image,dtype=np.float32)
test_image = test_image / 255


import matplotlib.pyplot as plt
%matplotlib inline 

plt.imshow(test_image[0,:,:,0])

# **테스트 결과 도출**

In [0]:
#test data를 이용해 평가 - 정확도 및 로스값 도출
test = model.evaluate(test_image, test_label, verbose=0)

print('loss :', test[0])
print('ACCURACY :', test[1])

In [0]:
#예측과 실제 값을 출력해보기
pred = model.predict(test_image)
print(test_label[0], pred[0])

#**confusion matrix 값 계산**

In [0]:
def true_positive(y_true, y_pred):  #6개의 카테고리 각각의 정답을 맞춘 개수
    zero_true_positive  = 0 #화산
    one_true_positive  = 0 #지진+산사태
    two_true_positive  = 0 #화재
    three_true_positive  = 0 #폭설
    four_true_positive  = 0 #도시
    five_true_positive  = 0 #자연
    for i in range (1877):
        if y_pred[i] == 0 and y_true[i] == 0:
            zero_true_positive += 1
        elif y_pred[i] == 1 and y_true[i] == 1:
            one_true_positive += 1
        elif y_pred[i] == 2 and y_true[i] == 2:
            two_true_positive += 1
        elif y_pred[i] == 3 and y_true[i] == 3:
            three_true_positive += 1
        elif y_pred[i] == 4 and y_true[i] == 4:
            four_true_positive += 1
        elif y_pred[i] == 5 and y_true[i] == 5:
            five_true_positive += 1
        i+=1
    return zero_true_positive, one_true_positive, two_true_positive, three_true_positive, four_true_positive, five_true_positive
print('true positive_each label:', true_positive(test_label, pred))

In [0]:
def zero_negative(y_true, y_pred): # 1번 카테고리(화산)가 답인데 다른 정답으로 예측한 개수
    one_false  = 0 
    two_false  = 0 
    three_false  = 0 
    four_false  = 0 
    five_false  = 0 
    for i in range (1877):
        if y_pred[i] == 1 and y_true[i] == 0:
            one_false += 1
        elif y_pred[i] == 2 and y_true[i] == 0:
            two_false += 1
        elif y_pred[i] == 3 and y_true[i] == 0:
            three_false += 1
        elif y_pred[i] == 4 and y_true[i] == 0:
            four_false += 1
        elif y_pred[i] == 5 and y_true[i] == 0:
            five_false += 1
        i+=1
    return one_false, two_false, three_false, four_false, five_false
print('zero_negative:', zero_negative(test_label, pred))

In [0]:
def one_negative(y_true, y_pred):# 2번 카테고리(지진과 산사태)가 답인데 다른 정답으로 예측한 개수
    zero_false  = 0 
    two_false  = 0 
    three_false  = 0 
    four_false  = 0 
    five_false  = 0 
    for i in range (1877):
        if y_pred[i] == 0 and y_true[i] == 1:
            zero_false += 1
        elif y_pred[i] == 2 and y_true[i] == 1:
            two_false += 1
        elif y_pred[i] == 3 and y_true[i] == 1:
            three_false += 1
        elif y_pred[i] == 4 and y_true[i] == 1:
            four_false += 1
        elif y_pred[i] == 5 and y_true[i] == 1:
            five_false += 1
        i+=1
    return zero_false, two_false, three_false, four_false, five_false
print('one_negative:', one_negative(test_label, pred))

In [0]:
def two_negative(y_true, y_pred):# 3번 카테고리(화재)가 답인데 다른 정답으로 예측한 개수
    zero_false  = 0
    one_false  = 0 
    three_false  = 0
    four_false  = 0 
    five_false  = 0 
    for i in range (1877):
        if y_pred[i] == 0 and y_true[i] == 2:
            zero_false += 1
        elif y_pred[i] == 1 and y_true[i] == 2:
            one_false += 1
        elif y_pred[i] == 3 and y_true[i] == 2:
            three_false += 1
        elif y_pred[i] == 4 and y_true[i] == 2:
            four_false += 1
        elif y_pred[i] == 5 and y_true[i] == 2:
            five_false += 1
        i+=1
    return zero_false, one_false, three_false, four_false, five_false
print('two_negative:', two_negative(test_label, pred))

In [0]:
def three_negative(y_true, y_pred):# 4번 카테고리(폭설)가 답인데 다른 정답으로 예측한 개수
    zero_false  = 0 
    one_false  = 0 
    two_false  = 0 
    four_false  = 0 
    five_false  = 0 
    for i in range (1877):
        if y_pred[i] == 0 and y_true[i] == 3:
            zero_false += 1
        elif y_pred[i] == 1 and y_true[i] == 3:
            one_false += 1
        elif y_pred[i] == 2 and y_true[i] == 3:
            two_false += 1
        elif y_pred[i] == 4 and y_true[i] == 3:
            four_false += 1
        elif y_pred[i] == 5 and y_true[i] == 3:
            five_false += 1
        i+=1
    return zero_false, one_false, two_false, four_false, five_false
print('three_negative:', three_negative(test_label, pred))

In [0]:
def four_negative(y_true, y_pred): # 5번 카테고리(도시)가 답인데 다른 정답으로 예측한 개수
    zero_false  = 0 
    one_false  = 0 
    two_false  = 0 
    three_false  = 0
    five_false  = 0 
    for i in range (1877):
        if y_pred[i] == 0 and y_true[i] == 4:
            zero_false += 1
        elif y_pred[i] == 1 and y_true[i] == 4:
            one_false += 1
        elif y_pred[i] == 2 and y_true[i] == 4:
            two_false += 1
        elif y_pred[i] == 3 and y_true[i] == 4:
            three_false += 1
        elif y_pred[i] == 5 and y_true[i] == 4:
            five_false += 1
        i+=1
    return zero_false, one_false, two_false, three_false, five_false
print('four_negative:', four_negative(test_label, pred))

In [0]:
def five_negative(y_true, y_pred):# 6번 카테고리(자연)가 답인데 다른 정답으로 예측한 개수
    zero_false  = 0 
    one_false  = 0 
    two_false  = 0 
    three_false  = 0 
    four_false  = 0 
    for i in range (1877):
        if y_pred[i] == 0 and y_true[i] == 5:
            zero_false += 1
        elif y_pred[i] == 1 and y_true[i] == 5:
            one_false += 1
        elif y_pred[i] == 2 and y_true[i] == 5:
            two_false += 1
        elif y_pred[i] == 3 and y_true[i] == 5:
            three_false += 1
        elif y_pred[i] == 4 and y_true[i] == 5:
            four_false += 1
        i+=1
    return zero_false, one_false, two_false, three_false, four_false
print('five_negative:', five_negative(test_label, pred))