# 資料載入

In [13]:
import pandas as pd
import os

os.environ['CUDA_VISIBLE_DEVICES'] = ""
row_train = pd.read_csv('data/train.csv')
row_test = pd.read_csv('data/test.csv')

label = row_train['label']
row_train = row_train.drop(columns=['label'])

# 建模

In [14]:
import keras
import numpy as np
from sklearn.model_selection import train_test_split
from keras import layers

# normalized
train_x = row_train/255
test_x = row_test/255
label_vec = pd.get_dummies(label)

train_x_array = train_x.values.reshape(train_x.shape[0],28,28,1)
test_x_array = test_x.values.reshape(test_x.shape[0],28,28,1)
print(train_x_array.shape)
print(test_x_array.shape)

# 分割資料
train_x, val_x, train_y, val_y = train_test_split(train_x_array,label_vec)

layer_list = [layers.Conv2D(50,kernel_size=(5,5),input_shape=(28,28,1),activation='relu'),
              layers.Conv2D(50,kernel_size=(3,3),activation='relu'),
              layers.MaxPool2D(2,2),
              layers.Conv2D(100,kernel_size=(3,3),activation='relu'),
              layers.MaxPool2D(2,2),
              layers.Conv2D(200,kernel_size=(3,3),activation='relu'),
              layers.MaxPool2D(2,2),
              layers.Flatten(),
              layers.Dense(200,activation='relu'),
              layers.Dense(10,activation='softmax')]

model = keras.models.Sequential(layer_list)
model.compile(loss='categorical_crossentropy',
             optimizer=keras.optimizers.Adam(),
             metrics=['accuracy'])

model.summary()
training_process = model.fit(train_x,train_y,
                             epochs=50,
                             batch_size=100,
                             validation_data=(val_x,val_y))


(42000, 28, 28, 1)
(28000, 28, 28, 1)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_31 (Conv2D)           (None, 24, 24, 50)        1300      
_________________________________________________________________
conv2d_32 (Conv2D)           (None, 22, 22, 50)        22550     
_________________________________________________________________
max_pooling2d_22 (MaxPooling (None, 11, 11, 50)        0         
_________________________________________________________________
conv2d_33 (Conv2D)           (None, 9, 9, 100)         45100     
_________________________________________________________________
max_pooling2d_23 (MaxPooling (None, 4, 4, 100)         0         
_________________________________________________________________
conv2d_34 (Conv2D)           (None, 2, 2, 200)         180200    
_________________________________________________________________
max_pooling2d_24 (MaxPooling (None, 1,

Epoch 48/50
Epoch 49/50
Epoch 50/50


In [15]:
import matplotlib.pyplot as plt

plt.figure(figsize=(20,16))
plt.plot(training_process.history['acc'])
plt.plot(training_process.history['val_acc'])
plt.xlabel('epochs')
plt.ylabel('acc')
plt.title('Accuracy')
plt.show()

<Figure size 2000x1600 with 1 Axes>

In [16]:

pred = np.argmax(model.predict(test_x_array), axis=1)

index = pd.DataFrame(range(1,test_x.shape[0]+1),columns=['ImageId'])
pred = pd.DataFrame(pred,columns=['Label'])

res_df = pd.concat([index,pred],axis=1)
res_df.to_csv('res.csv',index=False)
