Skip to content

Commit fec5170

Browse files
committed
feature: MNIST text Character image opencv using convolutional autoencoder
0 parents  commit fec5170

File tree

6 files changed

+368
-0
lines changed

6 files changed

+368
-0
lines changed
1.64 KB
Binary file not shown.

convautoencoder.ipynb

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"source": [
7+
"# import the necessary packages\n",
8+
"from tensorflow.keras.layers import BatchNormalization\n",
9+
"from tensorflow.keras.layers import Conv2D\n",
10+
"from tensorflow.keras.layers import Conv2DTranspose\n",
11+
"from tensorflow.keras.layers import LeakyReLU\n",
12+
"from tensorflow.keras.layers import Activation\n",
13+
"from tensorflow.keras.layers import Flatten\n",
14+
"from tensorflow.keras.layers import Dense\n",
15+
"from tensorflow.keras.layers import Reshape\n",
16+
"from tensorflow.keras.layers import Input\n",
17+
"from tensorflow.keras.models import Model\n",
18+
"from tensorflow.keras import backend as K\n",
19+
"import numpy as np\n",
20+
"\n",
21+
"\n",
22+
"def build(width, height, depth, filters=(32, 64), latentDim=16):\n",
23+
" # initialize the input shape to be \"channels last\" along with\n",
24+
" # the channels dimension itself\n",
25+
" # channels dimension itself\n",
26+
" inputShape = (height, width, depth)\n",
27+
" chanDim = -1\n",
28+
"\n",
29+
" # define the input to the encoder\n",
30+
" inputs = Input(shape=inputShape)\n",
31+
" x = inputs\n",
32+
" # loop over the number of filters\n",
33+
" for f in filters:\n",
34+
" # apply a CONV => RELU => BN operation\n",
35+
" x = Conv2D(f, (3, 3), strides=2, padding=\"same\")(x)\n",
36+
" x = LeakyReLU(alpha=0.2)(x)\n",
37+
" x = BatchNormalization(axis=chanDim)(x)\n",
38+
" # flatten the network and then construct our latent vector\n",
39+
" volumeSize = K.int_shape(x)\n",
40+
" x = Flatten()(x)\n",
41+
" latent = Dense(latentDim)(x)\n",
42+
" # build the encoder model\n",
43+
" encoder = Model(inputs, latent, name=\"encoder\")\n",
44+
"\n",
45+
" # start building the decoder model which will accept the\n",
46+
" # output of the encoder as its inputs\n",
47+
" latentInputs = Input(shape=(latentDim,))\n",
48+
" x = Dense(np.prod(volumeSize[1:]))(latentInputs)\n",
49+
" x = Reshape((volumeSize[1], volumeSize[2], volumeSize[3]))(x)\n",
50+
" # loop over our number of filters again, but this time in\n",
51+
" # reverse order\n",
52+
" for f in filters[::-1]:\n",
53+
" # apply a CONV_TRANSPOSE => RELU => BN operation\n",
54+
" x = Conv2DTranspose(f, (3, 3), strides=2,\n",
55+
" padding=\"same\")(x)\n",
56+
" x = LeakyReLU(alpha=0.2)(x)\n",
57+
" x = BatchNormalization(axis=chanDim)(x)\n",
58+
" \n",
59+
" # apply a single CONV_TRANSPOSE layer used to recover the\n",
60+
" # original depth of the image\n",
61+
" x = Conv2DTranspose(depth, (3, 3), padding=\"same\")(x)\n",
62+
" outputs = Activation(\"sigmoid\")(x)\n",
63+
" # build the decoder model\n",
64+
" decoder = Model(latentInputs, outputs, name=\"decoder\")\n",
65+
" # our autoencoder is the encoder + decoder\n",
66+
" autoencoder = Model(inputs, decoder(encoder(inputs)),\n",
67+
" name=\"autoencoder\")\n",
68+
" # return a 3-tuple of the encoder, decoder, and autoencoder\n",
69+
" return (encoder, decoder, autoencoder)"
70+
],
71+
"outputs": [
72+
{
73+
"output_type": "stream",
74+
"name": "stdout",
75+
"text": [
76+
"Init Plugin\n",
77+
"Init Graph Optimizer\n",
78+
"Init Kernel\n"
79+
]
80+
}
81+
],
82+
"metadata": {}
83+
},
84+
{
85+
"cell_type": "code",
86+
"execution_count": 3,
87+
"source": [
88+
"encoder, decoder, autoencoder = build(28, 28, 1)\n",
89+
"encoder.summary()"
90+
],
91+
"outputs": [
92+
{
93+
"output_type": "stream",
94+
"name": "stdout",
95+
"text": [
96+
"Model: \"encoder\"\n",
97+
"_________________________________________________________________\n",
98+
"Layer (type) Output Shape Param # \n",
99+
"=================================================================\n",
100+
"input_3 (InputLayer) [(None, 28, 28, 1)] 0 \n",
101+
"_________________________________________________________________\n",
102+
"conv2d_2 (Conv2D) (None, 14, 14, 32) 320 \n",
103+
"_________________________________________________________________\n",
104+
"leaky_re_lu_4 (LeakyReLU) (None, 14, 14, 32) 0 \n",
105+
"_________________________________________________________________\n",
106+
"batch_normalization_4 (Batch (None, 14, 14, 32) 128 \n",
107+
"_________________________________________________________________\n",
108+
"conv2d_3 (Conv2D) (None, 7, 7, 64) 18496 \n",
109+
"_________________________________________________________________\n",
110+
"leaky_re_lu_5 (LeakyReLU) (None, 7, 7, 64) 0 \n",
111+
"_________________________________________________________________\n",
112+
"batch_normalization_5 (Batch (None, 7, 7, 64) 256 \n",
113+
"_________________________________________________________________\n",
114+
"flatten_1 (Flatten) (None, 3136) 0 \n",
115+
"_________________________________________________________________\n",
116+
"dense_2 (Dense) (None, 16) 50192 \n",
117+
"=================================================================\n",
118+
"Total params: 69,392\n",
119+
"Trainable params: 69,200\n",
120+
"Non-trainable params: 192\n",
121+
"_________________________________________________________________\n"
122+
]
123+
}
124+
],
125+
"metadata": {}
126+
},
127+
{
128+
"cell_type": "code",
129+
"execution_count": 11,
130+
"source": [
131+
"decoder.summary()"
132+
],
133+
"outputs": [
134+
{
135+
"output_type": "stream",
136+
"name": "stdout",
137+
"text": [
138+
"Model: \"decoder\"\n",
139+
"_________________________________________________________________\n",
140+
"Layer (type) Output Shape Param # \n",
141+
"=================================================================\n",
142+
"input_9 (InputLayer) [(None, 16)] 0 \n",
143+
"_________________________________________________________________\n",
144+
"dense_8 (Dense) (None, 3136) 53312 \n",
145+
"_________________________________________________________________\n",
146+
"reshape_3 (Reshape) (None, 7, 7, 64) 0 \n",
147+
"_________________________________________________________________\n",
148+
"conv2d_transpose_9 (Conv2DTr (None, 14, 14, 64) 36928 \n",
149+
"_________________________________________________________________\n",
150+
"leaky_re_lu_16 (LeakyReLU) (None, 14, 14, 64) 0 \n",
151+
"_________________________________________________________________\n",
152+
"batch_normalization_16 (Batc (None, 14, 14, 64) 256 \n",
153+
"_________________________________________________________________\n",
154+
"conv2d_transpose_10 (Conv2DT (None, 28, 28, 32) 18464 \n",
155+
"_________________________________________________________________\n",
156+
"leaky_re_lu_17 (LeakyReLU) (None, 28, 28, 32) 0 \n",
157+
"_________________________________________________________________\n",
158+
"batch_normalization_17 (Batc (None, 28, 28, 32) 128 \n",
159+
"_________________________________________________________________\n",
160+
"conv2d_transpose_11 (Conv2DT (None, 28, 28, 1) 289 \n",
161+
"_________________________________________________________________\n",
162+
"activation_3 (Activation) (None, 28, 28, 1) 0 \n",
163+
"=================================================================\n",
164+
"Total params: 109,377\n",
165+
"Trainable params: 109,185\n",
166+
"Non-trainable params: 192\n",
167+
"_________________________________________________________________\n"
168+
]
169+
}
170+
],
171+
"metadata": {}
172+
},
173+
{
174+
"cell_type": "code",
175+
"execution_count": 9,
176+
"source": [
177+
"autoencoder.summary()"
178+
],
179+
"outputs": [
180+
{
181+
"output_type": "stream",
182+
"name": "stdout",
183+
"text": [
184+
"Model: \"autoencoder\"\n",
185+
"_________________________________________________________________\n",
186+
"Layer (type) Output Shape Param # \n",
187+
"=================================================================\n",
188+
"input_6 (InputLayer) [(None, 28, 28, 1)] 0 \n",
189+
"_________________________________________________________________\n",
190+
"encoder (Functional) (None, 16) 69392 \n",
191+
"_________________________________________________________________\n",
192+
"decoder (Functional) (None, 28, 28, 1) 109377 \n",
193+
"=================================================================\n",
194+
"Total params: 178,769\n",
195+
"Trainable params: 178,385\n",
196+
"Non-trainable params: 384\n",
197+
"_________________________________________________________________\n"
198+
]
199+
}
200+
],
201+
"metadata": {}
202+
}
203+
],
204+
"metadata": {
205+
"orig_nbformat": 4,
206+
"language_info": {
207+
"name": "python",
208+
"version": "3.8.10",
209+
"mimetype": "text/x-python",
210+
"codemirror_mode": {
211+
"name": "ipython",
212+
"version": 3
213+
},
214+
"pygments_lexer": "ipython3",
215+
"nbconvert_exporter": "python",
216+
"file_extension": ".py"
217+
},
218+
"kernelspec": {
219+
"name": "python3",
220+
"display_name": "Python 3.8.10 64-bit ('tf2.5': conda)"
221+
},
222+
"interpreter": {
223+
"hash": "79f4630616981068147ecb693f55d51ab12fab43ffc02db62e4992b7ed83fc2b"
224+
}
225+
},
226+
"nbformat": 4,
227+
"nbformat_minor": 2
228+
}

convautoencoder.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# import the necessary packages
2+
from tensorflow.keras.layers import BatchNormalization
3+
from tensorflow.keras.layers import Conv2D
4+
from tensorflow.keras.layers import Conv2DTranspose
5+
from tensorflow.keras.layers import LeakyReLU
6+
from tensorflow.keras.layers import Activation
7+
from tensorflow.keras.layers import Flatten
8+
from tensorflow.keras.layers import Dense
9+
from tensorflow.keras.layers import Reshape
10+
from tensorflow.keras.layers import Input
11+
from tensorflow.keras.models import Model
12+
from tensorflow.keras import backend as K
13+
import numpy as np
14+
15+
class ConvAutoencoder:
16+
@staticmethod
17+
def build(width, height, depth, filters=(32, 64), latentDim=16):
18+
# initialize the input shape to be "channels last" along with
19+
# the channels dimension itself
20+
# channels dimension itself
21+
inputShape = (height, width, depth)
22+
chanDim = -1
23+
24+
# define the input to the encoder
25+
inputs = Input(shape=inputShape)
26+
x = inputs
27+
# loop over the number of filters
28+
for f in filters:
29+
# apply a CONV => RELU => BN operation
30+
x = Conv2D(f, (3, 3), strides=2, padding="same")(x)
31+
x = LeakyReLU(alpha=0.2)(x)
32+
x = BatchNormalization(axis=chanDim)(x)
33+
# flatten the network and then construct our latent vector
34+
volumeSize = K.int_shape(x)
35+
x = Flatten()(x)
36+
latent = Dense(latentDim)(x)
37+
# build the encoder model
38+
encoder = Model(inputs, latent, name="encoder")
39+
40+
# start building the decoder model which will accept the
41+
# output of the encoder as its inputs
42+
latentInputs = Input(shape=(latentDim,))
43+
x = Dense(np.prod(volumeSize[1:]))(latentInputs)
44+
x = Reshape((volumeSize[1], volumeSize[2], volumeSize[3]))(x)
45+
# loop over our number of filters again, but this time in reverse order
46+
for f in filters[::-1]:
47+
# apply a CONV_TRANSPOSE => RELU => BN operation
48+
x = Conv2DTranspose(f, (3, 3), strides=2,
49+
padding="same")(x)
50+
x = LeakyReLU(alpha=0.2)(x)
51+
x = BatchNormalization(axis=chanDim)(x)
52+
# apply a single CONV_TRANSPOSE layer used to recover the original depth of the image
53+
x = Conv2DTranspose(depth, (3, 3), padding="same")(x)
54+
outputs = Activation("sigmoid")(x)
55+
# build the decoder model
56+
decoder = Model(latentInputs, outputs, name="decoder")
57+
# our autoencoder is the encoder + decoder
58+
autoencoder = Model(inputs, decoder(encoder(inputs)),
59+
name="autoencoder")
60+
# return a 3-tuple of the encoder, decoder, and autoencoder
61+
return (encoder, decoder, autoencoder)

output.png

3.91 KB
Loading

plot.png

33.5 KB
Loading

train_conv_autoencoder.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# set the matplotlib backend so figures can be saved in the background
2+
import matplotlib
3+
matplotlib.use("Agg")
4+
# import the necessary packages
5+
from convautoencoder import ConvAutoencoder
6+
from tensorflow.keras.optimizers import Adam
7+
from tensorflow.keras.datasets import mnist
8+
import matplotlib.pyplot as plt
9+
import numpy as np
10+
import argparse
11+
import cv2
12+
# construct the argument parse and parse the arguments
13+
ap = argparse.ArgumentParser()
14+
ap.add_argument("-s", "--samples", type=int, default=8,
15+
help="# number of samples to visualize when decoding")
16+
ap.add_argument("-o", "--output", type=str, default="output.png",
17+
help="path to output visualization file")
18+
ap.add_argument("-p", "--plot", type=str, default="plot.png",
19+
help="path to output plot file")
20+
args = vars(ap.parse_args())
21+
22+
# initialize the number of epochs to train for and batch size
23+
EPOCHS = 25
24+
BS = 32
25+
# load the MNIST dataset
26+
print("[INFO] loading MNIST dataset...")
27+
((trainX, _), (testX, _)) = mnist.load_data()
28+
# add a channel dimension to every image in the dataset, then scale
29+
# the pixel intensities to the range [0, 1]
30+
trainX = np.expand_dims(trainX, axis=-1)
31+
testX = np.expand_dims(testX, axis=-1)
32+
trainX = trainX.astype("float32") / 255.0
33+
testX = testX.astype("float32") / 255.0
34+
35+
# construct our convolutional autoencoder
36+
print("[INFO] building autoencoder...")
37+
(encoder, decoder, autoencoder) = ConvAutoencoder.build(28, 28, 1)
38+
opt = Adam(learning_rate=1e-3)
39+
autoencoder.compile(loss="mse", optimizer=opt)
40+
# train the convolutional autoencoder
41+
H = autoencoder.fit(
42+
trainX, trainX,
43+
validation_data=(testX, testX),
44+
epochs=EPOCHS,
45+
batch_size=BS)
46+
47+
# construct a plot that plots and saves the training history
48+
N = np.arange(0, EPOCHS)
49+
plt.style.use("ggplot")
50+
plt.figure()
51+
plt.plot(N, H.history["loss"], label="train_loss")
52+
plt.plot(N, H.history["val_loss"], label="val_loss")
53+
plt.title("Training Loss and Accuracy")
54+
plt.xlabel("Epoch #")
55+
plt.ylabel("Loss/Accuracy")
56+
plt.legend(loc="lower left")
57+
plt.savefig(args["plot"])
58+
59+
# use the convolutional autoencoder to make predictions on the
60+
# testing images, then initialize our list of output images
61+
print("[INFO] making predictions...")
62+
decoded = autoencoder.predict(testX)
63+
outputs = None
64+
# loop over our number of output samples
65+
for i in range(0, args["samples"]):
66+
# grab the original image and reconstructed image
67+
original = (testX[i] * 255).astype("uint8")
68+
recon = (decoded[i] * 255).astype("uint8")
69+
# stack the original and reconstructed image side-by-side
70+
output = np.hstack([original, recon])
71+
# if the outputs array is empty, initialize it as the current
72+
# side-by-side image display
73+
if outputs is None:
74+
outputs = output
75+
# otherwise, vertically stack the outputs
76+
else:
77+
outputs = np.vstack([outputs, output])
78+
# save the outputs image to disk
79+
cv2.imwrite(args["output"], outputs)

0 commit comments

Comments
 (0)