# SBA 빅데이터 사이언스 실무연계 프로젝트 🏫
## Machine Learning sevice on DSVM ☁️
---

### 01. Cifar10 Image Classification with Keras on VM

* [Keras Document](http://keras.io/)  
* [Cifar10 Dataset](https://www.cs.toronto.edu/~kriz/cifar.html)


##### 전미정 | 2019.11.21. | ninevincentg@gmail.com

## Part1. Data Preparing
### 1. Cifar10 데이터 가져오기

In [None]:
# 데이터 다운로드
import urllib.request
import os

os.makedirs('./data/', exist_ok=True)

images_url = 'https://www.dropbox.com/s/ua5rtnb1k4mdzpz/dataset.npy?dl=1'
label_url = 'https://www.dropbox.com/s/9apie8xg9vqylws/label.npy?dl=1'

urllib.request.urlretrieve(images_url, filename='./data/images.npy')
urllib.request.urlretrieve(label_url, filename='./data/label.npy')

In [None]:
# 데이터 확인
from sklearn.model_selection import train_test_split
import numpy as np

images = np.load('./data/images.npy')
label = np.load('./data/label.npy')

X_train, X_test, y_train, y_test = train_test_split(images, label, test_size = 0.2)

print("Total Images:", images.shape)
print("Total Label:", label.shape)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep='\n')

### 2. 데이터 이미지 확인

In [None]:
# 이미지, 레이블 확인

%matplotlib inline

import matplotlib.pyplot as plt

cifar10_label = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]

count = 0
sample_size = 10
plt.figure(figsize = (16, 6))
for i in np.random.permutation(images.shape[0])[:sample_size]:
    count = count + 1
    plt.subplot(1, sample_size, count)
    plt.axhline('')
    plt.axvline('')
    plt.text(x=10, y=-10, s=cifar10_label[label[i]], fontsize=15)
    plt.imshow(images[i], cmap=plt.cm.Greys)

plt.show()

### 3. 데이터 전처리

In [None]:
# 이미지 전처리 : Normalize 0~255 -> 0~1 

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train = X_train/255
X_test = X_test/255

print('X_train shape:', X_train.shape)


In [None]:
import keras.utils as utils

# Label 전처리 : One hot encoding
y_train = utils.to_categorical(y_train)
y_test = utils.to_categorical(y_test)

num_classes = y_test.shape[1]

print("\ny_train:\n")
print(y_train.shape)

print("\ny_val:\n")
print(y_test.shape)

print('num_classes:', num_classes)

---

## Part2. Model Training
### 4.CNN 모델 

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import optimizers

# 모델 생성/레이어 쌓기
model = Sequential()
model.add(Conv2D(8, (3, 3), input_shape=(32, 32, 3), padding='same', activation='relu'))
model.add(Dropout(0.2))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# 모델 compile
sgd = keras.optimizers.SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
print(model.summary())

### 5. 모델 훈련

In [None]:
batch_size = 32
epochs = 10

# 모델 훈련
history = model.fit(X_train, y_train,
                 epochs=epochs,
                 verbose=1,
                 validation_data=(X_test, y_test),
                 shuffle=True)

---

## Part3. Model Evaluate and Save
### 6. 모델 평가 


In [None]:
# 훈련된 모델 평가
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

### 7. 모델 저장

In [None]:
# notebook 디렉토리에 모델 저장
model.save('keras_cifar10_trained_model.h5')

---

## Part4. Visualize Training
### 10. 훈련 과정 살펴보기

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

---

## Part5. Test Model
### 11. 랜덤 이미지로 모델 테스트

In [None]:
# 검증 셋 중 3장의 이미지를 랜덤으로 선택해 훈련된 모델로 예측
import numpy
for index in numpy.random.choice(len(y_test), 3, replace = False):
    predicted = model.predict(X_test[index:index + 1])[0]
    label = y_test[index]
    result_label = numpy.where(label == numpy.amax(label))
    result_predicted = numpy.where(predicted == numpy.amax(predicted))
    title = "Label value = %s  Predicted value = %s " % (cifar10_label[int(result_label[0])],  cifar10_label[int(result_predicted[0])])
    
    fig = plt.figure(1, figsize = (3,3))
    ax1 = fig.add_axes((0,0,.8,.8))
    ax1.set_title(title)
    images = X_test
    plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')
    plt.show()

---
수고하셨습니다 🕊