xxl4tomxu98
diff --git a/‎__pycache__/convautoencoder.cpython-38.pyc‎
1.64 KB b/‎__pycache__/convautoencoder.cpython-38.pyc‎
1.64 KB
diff --git a/‎convautoencoder.ipynb‎
Lines changed: 228 additions & 0 deletions b/‎convautoencoder.ipynb‎
Lines changed: 228 additions & 0 deletions
diff --git a/‎convautoencoder.py‎
Lines changed: 61 additions & 0 deletions b/‎convautoencoder.py‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎output.png‎
3.91 KB b/‎output.png‎
3.91 KB
diff --git a/‎plot.png‎
33.5 KB b/‎plot.png‎
33.5 KB
diff --git a/‎train_conv_autoencoder.py‎
Lines changed: 79 additions & 0 deletions b/‎train_conv_autoencoder.py‎
Lines changed: 79 additions & 0 deletions
@@ -0,0 +1,228 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "source": [
+    "# import the necessary packages\n",
+    "from tensorflow.keras.layers import BatchNormalization\n",
+    "from tensorflow.keras.layers import Conv2D\n",
+    "from tensorflow.keras.layers import Conv2DTranspose\n",
+    "from tensorflow.keras.layers import LeakyReLU\n",
+    "from tensorflow.keras.layers import Activation\n",
+    "from tensorflow.keras.layers import Flatten\n",
+    "from tensorflow.keras.layers import Dense\n",
+    "from tensorflow.keras.layers import Reshape\n",
+    "from tensorflow.keras.layers import Input\n",
+    "from tensorflow.keras.models import Model\n",
+    "from tensorflow.keras import backend as K\n",
+    "import numpy as np\n",
+    "\n",
+    "\n",
+    "def build(width, height, depth, filters=(32, 64), latentDim=16):\n",
+    "    # initialize the input shape to be \"channels last\" along with\n",
+    "    # the channels dimension itself\n",
+    "    # channels dimension itself\n",
+    "    inputShape = (height, width, depth)\n",
+    "    chanDim = -1\n",
+    "\n",
+    "    # define the input to the encoder\n",
+    "    inputs = Input(shape=inputShape)\n",
+    "    x = inputs\n",
+    "    # loop over the number of filters\n",
+    "    for f in filters:\n",
+    "        # apply a CONV => RELU => BN operation\n",
+    "        x = Conv2D(f, (3, 3), strides=2, padding=\"same\")(x)\n",
+    "        x = LeakyReLU(alpha=0.2)(x)\n",
+    "        x = BatchNormalization(axis=chanDim)(x)\n",
+    "    # flatten the network and then construct our latent vector\n",
+    "    volumeSize = K.int_shape(x)\n",
+    "    x = Flatten()(x)\n",
+    "    latent = Dense(latentDim)(x)\n",
+    "    # build the encoder model\n",
+    "    encoder = Model(inputs, latent, name=\"encoder\")\n",
+    "\n",
+    "    # start building the decoder model which will accept the\n",
+    "    # output of the encoder as its inputs\n",
+    "    latentInputs = Input(shape=(latentDim,))\n",
+    "    x = Dense(np.prod(volumeSize[1:]))(latentInputs)\n",
+    "    x = Reshape((volumeSize[1], volumeSize[2], volumeSize[3]))(x)\n",
+    "    # loop over our number of filters again, but this time in\n",
+    "    # reverse order\n",
+    "    for f in filters[::-1]:\n",
+    "        # apply a CONV_TRANSPOSE => RELU => BN operation\n",
+    "        x = Conv2DTranspose(f, (3, 3), strides=2,\n",
+    "            padding=\"same\")(x)\n",
+    "        x = LeakyReLU(alpha=0.2)(x)\n",
+    "        x = BatchNormalization(axis=chanDim)(x)\n",
+    "    \n",
+    "    # apply a single CONV_TRANSPOSE layer used to recover the\n",
+    "    # original depth of the image\n",
+    "    x = Conv2DTranspose(depth, (3, 3), padding=\"same\")(x)\n",
+    "    outputs = Activation(\"sigmoid\")(x)\n",
+    "    # build the decoder model\n",
+    "    decoder = Model(latentInputs, outputs, name=\"decoder\")\n",
+    "    # our autoencoder is the encoder + decoder\n",
+    "    autoencoder = Model(inputs, decoder(encoder(inputs)),\n",
+    "        name=\"autoencoder\")\n",
+    "    # return a 3-tuple of the encoder, decoder, and autoencoder\n",
+    "    return (encoder, decoder, autoencoder)"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Init Plugin\n",
+      "Init Graph Optimizer\n",
+      "Init Kernel\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "source": [
+    "encoder, decoder, autoencoder = build(28, 28, 1)\n",
+    "encoder.summary()"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Model: \"encoder\"\n",
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "input_3 (InputLayer)         [(None, 28, 28, 1)]       0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_2 (Conv2D)            (None, 14, 14, 32)        320       \n",
+      "_________________________________________________________________\n",
+      "leaky_re_lu_4 (LeakyReLU)    (None, 14, 14, 32)        0         \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_4 (Batch (None, 14, 14, 32)        128       \n",
+      "_________________________________________________________________\n",
+      "conv2d_3 (Conv2D)            (None, 7, 7, 64)          18496     \n",
+      "_________________________________________________________________\n",
+      "leaky_re_lu_5 (LeakyReLU)    (None, 7, 7, 64)          0         \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_5 (Batch (None, 7, 7, 64)          256       \n",
+      "_________________________________________________________________\n",
+      "flatten_1 (Flatten)          (None, 3136)              0         \n",
+      "_________________________________________________________________\n",
+      "dense_2 (Dense)              (None, 16)                50192     \n",
+      "=================================================================\n",
+      "Total params: 69,392\n",
+      "Trainable params: 69,200\n",
+      "Non-trainable params: 192\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "source": [
+    "decoder.summary()"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Model: \"decoder\"\n",
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "input_9 (InputLayer)         [(None, 16)]              0         \n",
+      "_________________________________________________________________\n",
+      "dense_8 (Dense)              (None, 3136)              53312     \n",
+      "_________________________________________________________________\n",
+      "reshape_3 (Reshape)          (None, 7, 7, 64)          0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_transpose_9 (Conv2DTr (None, 14, 14, 64)        36928     \n",
+      "_________________________________________________________________\n",
+      "leaky_re_lu_16 (LeakyReLU)   (None, 14, 14, 64)        0         \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_16 (Batc (None, 14, 14, 64)        256       \n",
+      "_________________________________________________________________\n",
+      "conv2d_transpose_10 (Conv2DT (None, 28, 28, 32)        18464     \n",
+      "_________________________________________________________________\n",
+      "leaky_re_lu_17 (LeakyReLU)   (None, 28, 28, 32)        0         \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_17 (Batc (None, 28, 28, 32)        128       \n",
+      "_________________________________________________________________\n",
+      "conv2d_transpose_11 (Conv2DT (None, 28, 28, 1)         289       \n",
+      "_________________________________________________________________\n",
+      "activation_3 (Activation)    (None, 28, 28, 1)         0         \n",
+      "=================================================================\n",
+      "Total params: 109,377\n",
+      "Trainable params: 109,185\n",
+      "Non-trainable params: 192\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "source": [
+    "autoencoder.summary()"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Model: \"autoencoder\"\n",
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "input_6 (InputLayer)         [(None, 28, 28, 1)]       0         \n",
+      "_________________________________________________________________\n",
+      "encoder (Functional)         (None, 16)                69392     \n",
+      "_________________________________________________________________\n",
+      "decoder (Functional)         (None, 28, 28, 1)         109377    \n",
+      "=================================================================\n",
+      "Total params: 178,769\n",
+      "Trainable params: 178,385\n",
+      "Non-trainable params: 384\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  }
+ ],
+ "metadata": {
+  "orig_nbformat": 4,
+  "language_info": {
+   "name": "python",
+   "version": "3.8.10",
+   "mimetype": "text/x-python",
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "pygments_lexer": "ipython3",
+   "nbconvert_exporter": "python",
+   "file_extension": ".py"
+  },
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3.8.10 64-bit ('tf2.5': conda)"
+  },
+  "interpreter": {
+   "hash": "79f4630616981068147ecb693f55d51ab12fab43ffc02db62e4992b7ed83fc2b"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
@@ -0,0 +1,61 @@
+# import the necessary packages
+from tensorflow.keras.layers import BatchNormalization
+from tensorflow.keras.layers import Conv2D
+from tensorflow.keras.layers import Conv2DTranspose
+from tensorflow.keras.layers import LeakyReLU
+from tensorflow.keras.layers import Activation
+from tensorflow.keras.layers import Flatten
+from tensorflow.keras.layers import Dense
+from tensorflow.keras.layers import Reshape
+from tensorflow.keras.layers import Input
+from tensorflow.keras.models import Model
+from tensorflow.keras import backend as K
+import numpy as np
+
+class ConvAutoencoder:
+	@staticmethod
+	def build(width, height, depth, filters=(32, 64), latentDim=16):
+		# initialize the input shape to be "channels last" along with
+		# the channels dimension itself
+		# channels dimension itself
+		inputShape = (height, width, depth)
+		chanDim = -1
+
+        # define the input to the encoder
+		inputs = Input(shape=inputShape)
+		x = inputs
+		# loop over the number of filters
+		for f in filters:
+			# apply a CONV => RELU => BN operation
+			x = Conv2D(f, (3, 3), strides=2, padding="same")(x)
+			x = LeakyReLU(alpha=0.2)(x)
+			x = BatchNormalization(axis=chanDim)(x)
+		# flatten the network and then construct our latent vector
+		volumeSize = K.int_shape(x)
+		x = Flatten()(x)
+		latent = Dense(latentDim)(x)
+		# build the encoder model
+		encoder = Model(inputs, latent, name="encoder")
+
+        # start building the decoder model which will accept the
+		# output of the encoder as its inputs
+		latentInputs = Input(shape=(latentDim,))
+		x = Dense(np.prod(volumeSize[1:]))(latentInputs)
+		x = Reshape((volumeSize[1], volumeSize[2], volumeSize[3]))(x)
+		# loop over our number of filters again, but this time in reverse order
+		for f in filters[::-1]:
+			# apply a CONV_TRANSPOSE => RELU => BN operation
+			x = Conv2DTranspose(f, (3, 3), strides=2,
+				padding="same")(x)
+			x = LeakyReLU(alpha=0.2)(x)
+			x = BatchNormalization(axis=chanDim)(x)
+        # apply a single CONV_TRANSPOSE layer used to recover the original depth of the image
+		x = Conv2DTranspose(depth, (3, 3), padding="same")(x)
+		outputs = Activation("sigmoid")(x)
+        # build the decoder model
+		decoder = Model(latentInputs, outputs, name="decoder")
+        # our autoencoder is the encoder + decoder
+		autoencoder = Model(inputs, decoder(encoder(inputs)),
+            name="autoencoder")
+        # return a 3-tuple of the encoder, decoder, and autoencoder
+		return (encoder, decoder, autoencoder)
@@ -0,0 +1,79 @@
+# set the matplotlib backend so figures can be saved in the background
+import matplotlib
+matplotlib.use("Agg")
+# import the necessary packages
+from convautoencoder import ConvAutoencoder
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.datasets import mnist
+import matplotlib.pyplot as plt
+import numpy as np
+import argparse
+import cv2
+# construct the argument parse and parse the arguments
+ap = argparse.ArgumentParser()
+ap.add_argument("-s", "--samples", type=int, default=8,
+	help="# number of samples to visualize when decoding")
+ap.add_argument("-o", "--output", type=str, default="output.png",
+	help="path to output visualization file")
+ap.add_argument("-p", "--plot", type=str, default="plot.png",
+	help="path to output plot file")
+args = vars(ap.parse_args())
+
+# initialize the number of epochs to train for and batch size
+EPOCHS = 25
+BS = 32
+# load the MNIST dataset
+print("[INFO] loading MNIST dataset...")
+((trainX, _), (testX, _)) = mnist.load_data()
+# add a channel dimension to every image in the dataset, then scale
+# the pixel intensities to the range [0, 1]
+trainX = np.expand_dims(trainX, axis=-1)
+testX = np.expand_dims(testX, axis=-1)
+trainX = trainX.astype("float32") / 255.0
+testX = testX.astype("float32") / 255.0
+
+# construct our convolutional autoencoder
+print("[INFO] building autoencoder...")
+(encoder, decoder, autoencoder) = ConvAutoencoder.build(28, 28, 1)
+opt = Adam(learning_rate=1e-3)
+autoencoder.compile(loss="mse", optimizer=opt)
+# train the convolutional autoencoder
+H = autoencoder.fit(
+	trainX, trainX,
+	validation_data=(testX, testX),
+	epochs=EPOCHS,
+	batch_size=BS)
+
+# construct a plot that plots and saves the training history
+N = np.arange(0, EPOCHS)
+plt.style.use("ggplot")
+plt.figure()
+plt.plot(N, H.history["loss"], label="train_loss")
+plt.plot(N, H.history["val_loss"], label="val_loss")
+plt.title("Training Loss and Accuracy")
+plt.xlabel("Epoch #")
+plt.ylabel("Loss/Accuracy")
+plt.legend(loc="lower left")
+plt.savefig(args["plot"])
+
+# use the convolutional autoencoder to make predictions on the
+# testing images, then initialize our list of output images
+print("[INFO] making predictions...")
+decoded = autoencoder.predict(testX)
+outputs = None
+# loop over our number of output samples
+for i in range(0, args["samples"]):
+	# grab the original image and reconstructed image
+	original = (testX[i] * 255).astype("uint8")
+	recon = (decoded[i] * 255).astype("uint8")
+	# stack the original and reconstructed image side-by-side
+	output = np.hstack([original, recon])
+	# if the outputs array is empty, initialize it as the current
+	# side-by-side image display
+	if outputs is None:
+		outputs = output
+	# otherwise, vertically stack the outputs
+	else:
+		outputs = np.vstack([outputs, output])
+# save the outputs image to disk
+cv2.imwrite(args["output"], outputs)