-
Notifications
You must be signed in to change notification settings - Fork 6.4k
/
Copy pathbilstm_mnist.py
108 lines (81 loc) · 2.65 KB
/
bilstm_mnist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# https://deeplearningcourses.com/c/deep-learning-advanced-nlp
from __future__ import print_function, division
from builtins import range, input
# Note: you may need to update your version of future
# sudo pip install -U future
import os
from keras.models import Model
from keras.layers import Input, LSTM, GRU, Bidirectional, GlobalMaxPooling1D, Lambda, Concatenate, Dense
import keras.backend as K
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
try:
import keras.backend as K
if len(K.tensorflow_backend._get_available_gpus()) > 0:
from keras.layers import CuDNNLSTM as LSTM
from keras.layers import CuDNNGRU as GRU
except:
pass
def get_mnist(limit=None):
if not os.path.exists('../large_files'):
print("You must create a folder called large_files adjacent to the class folder first.")
if not os.path.exists('../large_files/train.csv'):
print("Looks like you haven't downloaded the data or it's not in the right spot.")
print("Please get train.csv from https://www.kaggle.com/c/digit-recognizer")
print("and place it in the large_files folder.")
print("Reading in and transforming data...")
df = pd.read_csv('../large_files/train.csv')
data = df.values
np.random.shuffle(data)
X = data[:, 1:].reshape(-1, 28, 28) / 255.0 # data is from 0..255
Y = data[:, 0]
if limit is not None:
X, Y = X[:limit], Y[:limit]
return X, Y
# get data
X, Y = get_mnist()
# config
D = 28
M = 15
# input is an image of size 28x28
input_ = Input(shape=(D, D))
# up-down
rnn1 = Bidirectional(LSTM(M, return_sequences=True))
x1 = rnn1(input_) # output is N x D x 2M
x1 = GlobalMaxPooling1D()(x1) # output is N x 2M
# left-right
rnn2 = Bidirectional(LSTM(M, return_sequences=True))
# custom layer
permutor = Lambda(lambda t: K.permute_dimensions(t, pattern=(0, 2, 1)))
x2 = permutor(input_)
x2 = rnn2(x2) # output is N x D x 2M
x2 = GlobalMaxPooling1D()(x2) # output is N x 2M
# put them together
concatenator = Concatenate(axis=1)
x = concatenator([x1, x2]) # output is N x 4M
# final dense layer
output = Dense(10, activation='softmax')(x)
model = Model(inputs=input_, outputs=output)
# testing
# o = model.predict(X)
# print("o.shape:", o.shape)
# compile
model.compile(
loss='sparse_categorical_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
# train
print('Training model...')
r = model.fit(X, Y, batch_size=32, epochs=10, validation_split=0.3)
# plot some data
plt.plot(r.history['loss'], label='loss')
plt.plot(r.history['val_loss'], label='val_loss')
plt.legend()
plt.show()
# accuracies
plt.plot(r.history['accuracy'], label='acc')
plt.plot(r.history['val_accuracy'], label='val_acc')
plt.legend()
plt.show()