# 데이터 로드

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [0]:
import tensorflow as tf
from tensorflow.keras import layers, models

from  sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

2.0.0


In [0]:
train = pd.read_csv("./drive/My Drive/data/kannada/train.csv")
test  = pd.read_csv("./drive/My Drive/data/kannada/test.csv")

# 데이터 전처리

In [0]:
train_images = train.iloc[:,1:]
train_labels = train["label"]
test_images = test.iloc[:,1:]

In [0]:
x_train, x_valid, y_train, y_valid = train_test_split(train_images, train_labels, test_size=0.1, random_state=42)

In [0]:
# class_names = ['0','1','2','3','4','5','6','7','8','9']

In [0]:
# Keras는 0~1 사이의 값일 때 가장 최적의 성능을 보이기 때문에 픽셀 값의 최대값인 255로 나눠줌
x_train, x_valid, x_test = x_train.apply(lambda x: x/255), x_valid.apply(lambda x: x/255), test_images.apply(lambda x: x/255)

In [0]:
# 이미지 사이즈 변환 : (54000, 784) -> (54000, 28, 28)
# CNN은 (이미지 높이, 이미지 너비, 컬러 채널) 크기의 텐서(tensor)를 입력으로 받기 때문에
# 컬러 채널은 컬러면 3, 흑백이면 1
print(x_train.shape)
x_train = x_train.values.reshape(-1, 28, 28, 1)
x_valid = x_valid.values.reshape(-1, 28, 28, 1)
print(x_train.shape)

(54000, 784)
(54000, 28, 28, 1)


In [0]:
y_train = y_train.to_numpy()
y_valid = y_valid.to_numpy()

# 모델 구성

In [0]:
# 층을 쌓기 위해 먼저 Sequential 함수 사용
model = models.Sequential()
model.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))

model.add(layers.Conv2D(256, (3, 3), activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))

In [0]:
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dense(10, activation='softmax'))

In [0]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 64)        640       
_________________________________________________________________
batch_normalization (BatchNo (None, 26, 26, 64)        256       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 64)        36928     
_________________________________________________________________
batch_normalization_1 (Batch (None, 24, 24, 64)        256       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 12, 12, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 10, 10, 128)       7

# 모델 컴파일 및 학습

In [0]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [0]:
model.fit(x_train, y_train, epochs=20)

Train on 54000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f1998fdd198>

# Validation 데이터로 평가

In [0]:
valid_loss, valid_acc = model.evaluate(x_valid, y_valid, verbose=2)

6000/1 - 1s - loss: 0.0087 - accuracy: 0.9957


# Test 데이터 예측 및 평가

In [0]:
x_test = x_test.values.reshape(-1, 28, 28, 1)
pred = model.predict(x_test)
y_test = np.argmax(pred, axis=1)
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)

5000/1 - 0s - loss: 0.0033 - accuracy: 1.0000


# 제출

In [0]:
#test['label'] = np.argmax(pred, axis=1)
#sub = test[['id', 'label']]
#sub.to_csv('submission.csv', index=False)