**Chapter 14 – Deep Computer Vision Using Convolutional Neural Networks**

_This notebook contains all the sample code and solutions to the exercises in chapter 14._

# Setup

This project requires Python 3.7 or above:

In [1]:
import matplotlib.pyplot as plt
import tensorflow as tf
import sklearn
from packaging import version
import sys
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

assert sys.version_info >= (3, 7)
print("sklearn version: ", sklearn.__version__)
assert version.parse(sklearn.__version__) >= version.parse("1.0.1")
print("TF version: ", tf.__version__)
assert version.parse(tf.__version__) >= version.parse("2.8.0")


plt.rc('font', size=14)
plt.rc('axes', labelsize=14, titlesize=14)
plt.rc('legend', fontsize=14)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)

# To prevent "CUDNN_STATUS_ALLOC_FAILED" error with GPUs
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

2024-05-19 16:13:52.909342: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-19 16:13:52.909376: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-19 16:13:52.909404: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-19 16:13:52.915721: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


sklearn version:  1.4.2
TF version:  2.14.0
1 Physical GPUs, 1 Logical GPUs


2024-05-19 16:13:54.659024: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-05-19 16:13:54.688876: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-05-19 16:13:54.689214: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [2]:
BATCH_SIZE = 500

# CNN Architectures

**Tackling Fashion MNIST With a CNN**

In [3]:
import numpy as np
import time
import pandas as pd
import IPython
from functools import partial
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras import layers

In [4]:
# extra code – loads the mnist dataset, add the channels axis to the inputs,
#              scales the values to the 0-1 range, and splits the dataset
mnist = tf.keras.datasets.fashion_mnist.load_data()
(X_train_full, y_train_full), (X_test, y_test) = mnist
X_train_full = np.expand_dims(X_train_full, axis=-1).astype(np.float32) / 255
X_test = np.expand_dims(X_test.astype(np.float32), axis=-1) / 255
X_train, X_valid = X_train_full[:-5000], X_train_full[-5000:]
y_train, y_valid = y_train_full[:-5000], y_train_full[-5000:]

In [5]:
result = {'Model': [], 'accuracy': [], 'training_time': []}

In [6]:
from functools import partial

tf.random.set_seed(42)  # extra code – ensures reproducibility
DefaultConv2D = partial(tf.keras.layers.Conv2D, kernel_size=3, padding="same",
                        activation="relu", kernel_initializer="he_normal")
model = tf.keras.Sequential([
    DefaultConv2D(filters=64, kernel_size=7, input_shape=[28, 28, 1]),
    tf.keras.layers.MaxPool2D(),
    DefaultConv2D(filters=128),
    DefaultConv2D(filters=128),
    tf.keras.layers.MaxPool2D(),
    DefaultConv2D(filters=256),
    DefaultConv2D(filters=256),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=128, activation="relu",
                          kernel_initializer="he_normal"),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(units=64, activation="relu",
                          kernel_initializer="he_normal"),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(units=10, activation="softmax")
])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 64)        3200      
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 64)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 128)       73856     
                                                                 
 conv2d_2 (Conv2D)           (None, 14, 14, 128)       147584    
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 7, 7, 128)         0         
 g2D)                                                            
                                                                 
 conv2d_3 (Conv2D)           (None, 7, 7, 256)         2

In [7]:
# extra code – compiles, fits, evaluates, and uses the model to make predictions
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam",
              metrics=["accuracy"])
n_epoch = 5
# With GPU, train for 10 epochs
# n_epoch=10
history = model.fit(X_train, y_train, epochs=n_epoch,
                    validation_data=(X_valid, y_valid))
# Evaluation
score = model.evaluate(X_test, y_test)
X_new = X_test[:10]  # pretend we have new images
y_pred = model.predict(X_new)

Epoch 1/5


2024-05-19 16:13:58.253963: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8800
2024-05-19 16:14:00.417170: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f4b89e28590 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-05-19 16:14:00.417203: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA RTX A6000, Compute Capability 8.6
2024-05-19 16:14:00.424708: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-05-19 16:14:00.524330: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


# Exercise 14.1
- Construct simplified LeNet-5 as shown in the table
- ReLu activation
- Ignore S2->C3 connection and consider regular connection
- Dropout rate: 0.5 for FC
- Output layer: softmax
- Train and evalute the LeNet-5 model and compare the results of the model in the practice code.

practice

In [8]:
tf.random.set_seed(42)  # extra code – ensures reproducibility
DefaultConv2D = partial(tf.keras.layers.Conv2D, kernel_size=3, padding="same",
                        activation="relu", kernel_initializer="he_normal")
model = tf.keras.Sequential([
    DefaultConv2D(filters=64, kernel_size=7, input_shape=[28, 28, 1]),
    tf.keras.layers.MaxPool2D(),
    DefaultConv2D(filters=128),
    DefaultConv2D(filters=128),
    tf.keras.layers.MaxPool2D(),
    DefaultConv2D(filters=256),
    DefaultConv2D(filters=256),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=128, activation="relu",
                          kernel_initializer="he_normal"),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(units=64, activation="relu",
                          kernel_initializer="he_normal"),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(units=10, activation="softmax")
])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_5 (Conv2D)           (None, 28, 28, 64)        3200      
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 14, 14, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_6 (Conv2D)           (None, 14, 14, 128)       73856     
                                                                 
 conv2d_7 (Conv2D)           (None, 14, 14, 128)       147584    
                                                                 
 max_pooling2d_4 (MaxPoolin  (None, 7, 7, 128)         0         
 g2D)                                                            
                                                                 
 conv2d_8 (Conv2D)           (None, 7, 7, 256)        

In [9]:
# Compile, train and evaluate
n_epoch = 10
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

start_time = time.time()
history = model.fit(X_train, y_train, epochs=n_epoch,
                    validation_data=(X_valid, y_valid), batch_size=BATCH_SIZE)
end_time = time.time()

score = model.evaluate(X_test, y_test)

result['accuracy'].append(score[1])
result['training_time'].append(end_time-start_time)
result['Model'].append('Model in practice')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
# Define simplified LeNet-5
tf.keras.backend.clear_session()

model1 = tf.keras.Sequential([
    layers.Input(shape=(28, 28, 1)),
    layers.ZeroPadding2D(padding=(2, 2)),

    layers.Conv2D(filters=6, kernel_size=(5, 5), activation='relu', input_shape=(32, 32, 1)),
    layers.AveragePooling2D(pool_size=(2, 2), strides=2),

    layers.Conv2D(filters=16, kernel_size=(5, 5), activation='relu'),
    layers.AveragePooling2D(pool_size=(2, 2), strides=2),

    layers.Conv2D(filters=120, kernel_size=(5, 5), activation='relu'),
    layers.Flatten(),

    layers.Dense(84, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')
])

model1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 zero_padding2d (ZeroPaddin  (None, 32, 32, 1)         0         
 g2D)                                                            
                                                                 
 conv2d (Conv2D)             (None, 28, 28, 6)         156       
                                                                 
 average_pooling2d (Average  (None, 14, 14, 6)         0         
 Pooling2D)                                                      
                                                                 
 conv2d_1 (Conv2D)           (None, 10, 10, 16)        2416      
                                                                 
 average_pooling2d_1 (Avera  (None, 5, 5, 16)          0         
 gePooling2D)                                                    
                                                        

In [11]:
# Compile, train and evaluate
n_epoch = 10
model1.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

start_time = time.time()
history = model1.fit(X_train, y_train, epochs=n_epoch,
                     validation_data=(X_valid, y_valid), batch_size=BATCH_SIZE)
end_time = time.time()

score = model1.evaluate(X_test, y_test)

result['accuracy'].append(score[1])
result['training_time'].append(end_time-start_time)
result['Model'].append('LeNet-5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
results_df = pd.DataFrame(result)
display(results_df)

Unnamed: 0,Model,accuracy,training_time
0,Model in practice,0.9073,17.809999
1,LeNet-5,0.8611,6.718439


In [13]:
for r in result:
    result[r] = result[r][1:]

# Exercise 14.2
Construct VGG-like LeNet for MNIST
- 3 conv. Layers: each layer has 2 convolutional 3x3 filters with ReLU activation
  -> in - c1 - c2 - s3 - c4 - c5 - s6 - c7 - c8 - fc - out
- Number of kernels: 6-16-120
- padding: SAME
- Max pooling with 2x2 mask and stride=2
- FC: 84-10.
- Dropout rate: 0.5
- Output: Softmax

In [14]:
# 14.2.1 Define VGG_like LeNet
tf.keras.backend.clear_session()


model2 = tf.keras.Sequential([
    layers.Input(shape=(28, 28, 1)),
    layers.ZeroPadding2D(padding=(2, 2)),
    layers.Conv2D(filters=3, kernel_size=(6, 6), activation='relu', input_shape=(32, 32, 1), padding='SAME',),
    layers.Conv2D(filters=3, kernel_size=(6, 6), activation='relu', padding='SAME',),
    layers.MaxPooling2D(pool_size=(2, 2), strides=2),

    layers.Conv2D(filters=3, kernel_size=(16, 16), activation='relu', input_shape=(32, 32, 1), padding='SAME',),
    layers.Conv2D(filters=3, kernel_size=(16, 16), activation='relu', padding='SAME',),
    layers.MaxPooling2D(pool_size=(2, 2), strides=2),

    layers.Conv2D(filters=3, kernel_size=(120, 120), activation='relu', input_shape=(32, 32, 1), padding='SAME',),
    layers.Conv2D(filters=3, kernel_size=(120, 120), activation='relu', padding='SAME',),
    layers.MaxPooling2D(pool_size=(2, 2), strides=2),

    layers.Flatten(),

    layers.Dense(84, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')
])

model2.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 zero_padding2d (ZeroPaddin  (None, 32, 32, 1)         0         
 g2D)                                                            
                                                                 
 conv2d (Conv2D)             (None, 32, 32, 3)         111       
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 3)         327       
                                                                 
 max_pooling2d (MaxPooling2  (None, 16, 16, 3)         0         
 D)                                                              
                                                                 
 conv2d_2 (Conv2D)           (None, 16, 16, 3)         2307      
                                                                 
 conv2d_3 (Conv2D)           (None, 16, 16, 3)         2

In [15]:
# Compile, train and evaluate
n_epoch = 10
model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

start_time = time.time()
history = model2.fit(X_train, y_train, epochs=n_epoch,
                     validation_data=(X_valid, y_valid), batch_size=BATCH_SIZE)
end_time = time.time()

score = model2.evaluate(X_test, y_test)

result['accuracy'].append(score[1])
result['training_time'].append(end_time-start_time)
result['Model'].append('VGG like LeNet(kernel 6-16-120)')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Compare results by changing # of kernels, # neurons of FC1, # of conv. layers, batch normalization, and activation functions.

- Compare results with LeNet: Accuracy, Training time

2. Kernal수를 아래와 변경하여 학습후 결과를 비교하시오.
Number of kernels: 16-32-64

In [16]:
# 14.2.2 Define VGG_like LeNet with different number of kernels
tf.keras.backend.clear_session()

model3 = tf.keras.Sequential([
    layers.Input(shape=(28, 28, 1)),
    layers.ZeroPadding2D(padding=(2, 2)),
    layers.Conv2D(filters=3, kernel_size=(16, 16), activation='relu', input_shape=(32, 32, 1), padding='SAME',),
    layers.Conv2D(filters=3, kernel_size=(16, 16), activation='relu', padding='SAME',),
    layers.MaxPooling2D(pool_size=(2, 2), strides=2),

    layers.Conv2D(filters=3, kernel_size=(32, 32), activation='relu', input_shape=(32, 32, 1), padding='SAME',),
    layers.Conv2D(filters=3, kernel_size=(32, 32), activation='relu', padding='SAME',),
    layers.MaxPooling2D(pool_size=(2, 2), strides=2),

    layers.Conv2D(filters=3, kernel_size=(64, 64), activation='relu', input_shape=(32, 32, 1), padding='SAME',),
    layers.Conv2D(filters=3, kernel_size=(64, 64), activation='relu', padding='SAME',),
    layers.MaxPooling2D(pool_size=(2, 2), strides=2),

    layers.Flatten(),

    layers.Dense(84, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')
])

model3.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 zero_padding2d (ZeroPaddin  (None, 32, 32, 1)         0         
 g2D)                                                            
                                                                 
 conv2d (Conv2D)             (None, 32, 32, 3)         771       
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 3)         2307      
                                                                 
 max_pooling2d (MaxPooling2  (None, 16, 16, 3)         0         
 D)                                                              
                                                                 
 conv2d_2 (Conv2D)           (None, 16, 16, 3)         9219      
                                                                 
 conv2d_3 (Conv2D)           (None, 16, 16, 3)         9

In [17]:
# Compile, train and evaluate
n_epoch = 10
model3.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

start_time = time.time()
history = model3.fit(X_train, y_train, epochs=n_epoch,
                     validation_data=(X_valid, y_valid), batch_size=BATCH_SIZE)
end_time = time.time()

score = model3.evaluate(X_test, y_test)

result['accuracy'].append(score[1])
result['training_time'].append(end_time-start_time)
result['Model'].append('VGG like LeNet(kernel 16-32-64)')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
results_df = pd.DataFrame(result)
display(results_df)

Unnamed: 0,Model,accuracy,training_time
0,LeNet-5,0.8611,6.718439
1,VGG like LeNet(kernel 6-16-120),0.8076,26.48883
2,VGG like LeNet(kernel 16-32-64),0.8203,22.708542


# Exercise 14.3  
Resen-34를 이용하여 Fashion MNIST를 학습시키고자 한다.  그러나 ReseNet-34는 ImageNet data aize인 224x224에 맞게 구성되어 있으므로 이를 수정하여야 한다.
1. ResNet-34에 28x28의 Fashion MNSIT data를 입력할 경우 featue size의 변화를 확인하시오.
2. Fashion MNIST data의 경우 크기가 작으므로 첫번째 conv. layer에서 feature size를 줄이는 것은 적합하지 않다. ResNet-34를 수정하여 첫번째 conv. layer에서 feature size를 유지하도록 하고 학습시킨 결과를 확인하고 LeNet-5 및 VGG-like LeNet과 비교하시오.
3. ImageNet을 위한 ResNet-34는 7x7 feature를 GlobalAveragePooling layer를 통과시켰다. Fashion MNIST에 대해서도 동일한 동작을 하도록 high layer를 제거하고 학습결과를 비교하시오. Kernel 수는 low layer로부터 시작한 값을 유지한다.  
(Layer수가 줄었으므로 ResNet-34는 적합하지 않고 ResNet-16이 적합하나 편의상 ResNet-34로 부르기로 한다)
4. 3번에서 kernel수가 64, 128일 때 residual layer를 각각 3, 4개씩 유지하였는데 이를 2, 3개로 줄이고 학습결과를 비교하시오.
5. 2-4번의 결과를 보고 accuracy를 유지하는 범위내에서 네트워크 복잡도를 줄여 학습시간을 최소화하는 ResNet-34를 설계하고 학습결과를 비교하시오.

## 14.3.1.

ResNet-34에 28x28의 Fashion MNSIT data를 입력할 경우 featue size의 변화를 확인하시오.
    

**feature size는 (28,28) -> (14,14) -> (7,7) -> (4,4) 로 변화한다.**

In [19]:
DefaultConv2D = partial(tf.keras.layers.Conv2D, kernel_size=3, strides=1,
                        padding="same", kernel_initializer="he_normal",
                        use_bias=False)


class ResidualUnit(tf.keras.layers.Layer):
    def __init__(self, filters, strides=1, activation="relu", **kwargs):
        super().__init__(**kwargs)
        self.activation = tf.keras.activations.get(activation)
        self.main_layers = [
            DefaultConv2D(filters, strides=strides),
            tf.keras.layers.BatchNormalization(),
            self.activation,
            DefaultConv2D(filters),
            tf.keras.layers.BatchNormalization()
        ]
        self.skip_layers = []
        if strides > 1:
            self.skip_layers = [
                DefaultConv2D(filters, kernel_size=1, strides=strides),
                tf.keras.layers.BatchNormalization()
            ]

    def call(self, inputs):
        Z = inputs
        for layer in self.main_layers:
            Z = layer(Z)
        skip_Z = inputs
        for layer in self.skip_layers:
            skip_Z = layer(skip_Z)
        return self.activation(Z + skip_Z)

In [22]:
# Ex. 14.3.1
# Define ResNet-34 for 28x28 input

tf.keras.backend.clear_session()

model4a = tf.keras.Sequential([
    DefaultConv2D(64, kernel_size=7, strides=2, input_shape=[28, 28, 1]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation("relu"),
    tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding="same"),
])
prev_filters = 64
for filters in [64] * 3 + [128] * 4 + [256] * 6 + [512] * 3:
    strides = 1 if filters == prev_filters else 2
    model4a.add(ResidualUnit(filters, strides=strides))
    prev_filters = filters

model4a.add(tf.keras.layers.GlobalAvgPool2D())
model4a.add(tf.keras.layers.Flatten())
model4a.add(tf.keras.layers.Dense(10, activation="softmax"))
model4a.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 14, 14, 64)        3136      
                                                                 
 batch_normalization (Batch  (None, 14, 14, 64)        256       
 Normalization)                                                  
                                                                 
 activation (Activation)     (None, 14, 14, 64)        0         
                                                                 
 max_pooling2d (MaxPooling2  (None, 7, 7, 64)          0         
 D)                                                              
                                                                 
 residual_unit (ResidualUni  (None, 7, 7, 64)          74240     
 t)                                                              
                                                        

In [23]:
# Compile, train and evaluate
n_epoch = 10
model4a.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

start_time = time.time()
history = model4a.fit(X_train, y_train, epochs=n_epoch,
                      validation_data=(X_valid, y_valid), batch_size=BATCH_SIZE)
end_time = time.time()

score = model4a.evaluate(X_test, y_test)

result['accuracy'].append(score[1])
result['training_time'].append(end_time-start_time)
result['Model'].append('14.3.1 ResNet-34')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## 14.3.2.

Fashion MNIST data의 경우 크기가 작으므로 첫번째 conv. layer에서 feature size를 줄이는 것은 적합하지 않다. ResNet-34를 수정하여 첫번째 conv. layer에서 feature size를 유지하도록 하고 학습시킨 결과를 확인하고 LeNet-5 및 VGG-like LeNet과 비교하시오.

In [24]:
# Ex. 14.3.2
# Modify ResNet-34
tf.keras.backend.clear_session()

model4b = tf.keras.Sequential([
    DefaultConv2D(64, kernel_size=1, strides=1, input_shape=[28, 28, 1]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'),
])
prev_filters = 64
for filters in [64] * 3 + [128] * 4 + [256] * 6 + [512] * 3:
    strides = 1 if filters == prev_filters else 2
    model4b.add(ResidualUnit(filters, strides=strides))
    prev_filters = filters

model4b.add(tf.keras.layers.GlobalAvgPool2D())
model4b.add(tf.keras.layers.Flatten())
model4b.add(tf.keras.layers.Dense(10, activation='softmax'))
model4b.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 64)        64        
                                                                 
 batch_normalization (Batch  (None, 28, 28, 64)        256       
 Normalization)                                                  
                                                                 
 activation (Activation)     (None, 28, 28, 64)        0         
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 64)        0         
 D)                                                              
                                                                 
 residual_unit (ResidualUni  (None, 14, 14, 64)        74240     
 t)                                                              
                                                        

In [25]:
# Compile, train and evaluate
n_epoch = 10
model4b.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

start_time = time.time()
history = model4b.fit(X_train, y_train, epochs=n_epoch,
                      validation_data=(X_valid, y_valid), batch_size=BATCH_SIZE)
end_time = time.time()

score = model4b.evaluate(X_test, y_test)

result['accuracy'].append(score[1])
result['training_time'].append(end_time-start_time)
result['Model'].append('14.3.2 Revised ResNet-34')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [26]:
results_df = pd.DataFrame(result)
display(results_df)

Unnamed: 0,Model,accuracy,training_time
0,LeNet-5,0.8611,6.718439
1,VGG like LeNet(kernel 6-16-120),0.8076,26.48883
2,VGG like LeNet(kernel 16-32-64),0.8203,22.708542
3,14.3.1 ResNet-34,0.8713,40.072834
4,14.3.2 Revised ResNet-34,0.8804,61.152374


## 14.3.3.

ImageNet을 위한 ResNet-34는 7x7 feature를 GlobalAveragePooling layer를 통과시켰다. Fashion MNIST에 대해서도 동일한 동작을 하도록 high layer를 제거하고 학습결과를 비교하시오. Kernel 수는 low layer로부터 시작한 값을 유지한다.  
(Layer수가 줄었으므로 ResNet-34는 적합하지 않고 ResNet-16이 적합하나 편의상 ResNet-34로 부르기로 한다)


In [27]:
# Ex. 14.3.3
# Modify ResNet-34
tf.keras.backend.clear_session()

model4c = tf.keras.Sequential([
    DefaultConv2D(64, kernel_size=1, strides=1, input_shape=[28, 28, 1]),
    layers.BatchNormalization(),
    layers.Activation("relu"),
    layers.MaxPooling2D(pool_size=3, strides=2, padding="same"),
])

prev_filters = 64
for filters in [64] * 3 + [128] * 4:
    strides = 1 if filters == prev_filters else 2
    model4c.add(ResidualUnit(filters, strides=strides))
    prev_filters = filters

model4c.add(layers.GlobalAveragePooling2D())
model4c.add(layers.Flatten())
model4c.add(layers.Dense(10, activation="softmax"))
model4c.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 64)        64        
                                                                 
 batch_normalization (Batch  (None, 28, 28, 64)        256       
 Normalization)                                                  
                                                                 
 activation (Activation)     (None, 28, 28, 64)        0         
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 64)        0         
 D)                                                              
                                                                 
 residual_unit (ResidualUni  (None, 14, 14, 64)        74240     
 t)                                                              
                                                        

In [28]:
# Compile, train and evaluate
n_epoch = 10
model4c.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

start_time = time.time()
history = model4c.fit(X_train, y_train, epochs=n_epoch,
                      validation_data=(X_valid, y_valid), batch_size=BATCH_SIZE)
end_time = time.time()

score = model4c.evaluate(X_test, y_test)

result['accuracy'].append(score[1])
result['training_time'].append(end_time-start_time)
result['Model'].append('14.3.3 Revised ResNet-34 with 7x7')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [29]:
results_df = pd.DataFrame(result)
display(results_df)

Unnamed: 0,Model,accuracy,training_time
0,LeNet-5,0.8611,6.718439
1,VGG like LeNet(kernel 6-16-120),0.8076,26.48883
2,VGG like LeNet(kernel 16-32-64),0.8203,22.708542
3,14.3.1 ResNet-34,0.8713,40.072834
4,14.3.2 Revised ResNet-34,0.8804,61.152374
5,14.3.3 Revised ResNet-34 with 7x7,0.8544,32.327105


## 14.3.4.

4. 3번에서 kernel수가 64, 128일 때 residual layer를 각각 3, 4개씩 유지하였는데 이를 2, 3개로 줄이고 학습결과를 비교하시오.

In [30]:
# Ex. 14.3.4
# Modify ResNet-34

tf.keras.backend.clear_session()

model4d = tf.keras.Sequential([
    DefaultConv2D(64, kernel_size=1, strides=1, input_shape=[28, 28, 1]),
    layers.BatchNormalization(),
    layers.Activation("relu"),
    layers.MaxPooling2D(pool_size=3, strides=2, padding="same"),
])

prev_filters = 64
for filters in [64] * 2 + [128] * 3:
    strides = 1 if filters == prev_filters else 2
    model4d.add(ResidualUnit(filters, strides=strides))
    prev_filters = filters

model4d.add(layers.GlobalAveragePooling2D())
model4d.add(layers.Flatten())
model4d.add(layers.Dense(10, activation="softmax"))
model4d.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 64)        64        
                                                                 
 batch_normalization (Batch  (None, 28, 28, 64)        256       
 Normalization)                                                  
                                                                 
 activation (Activation)     (None, 28, 28, 64)        0         
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 64)        0         
 D)                                                              
                                                                 
 residual_unit (ResidualUni  (None, 14, 14, 64)        74240     
 t)                                                              
                                                        

In [31]:
# Compile, train and evaluate
# Compile, train and evaluate
n_epoch = 10
model4d.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

start_time = time.time()
history = model4d.fit(X_train, y_train, epochs=n_epoch,
                      validation_data=(X_valid, y_valid), batch_size=BATCH_SIZE)
end_time = time.time()

score = model4d.evaluate(X_test, y_test)

result['accuracy'].append(score[1])
result['training_time'].append(end_time-start_time)
result['Model'].append('14.3.4 Revised ResNet-34 reduced layers')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [32]:
results_df = pd.DataFrame(result)
display(results_df)

Unnamed: 0,Model,accuracy,training_time
0,LeNet-5,0.8611,6.718439
1,VGG like LeNet(kernel 6-16-120),0.8076,26.48883
2,VGG like LeNet(kernel 16-32-64),0.8203,22.708542
3,14.3.1 ResNet-34,0.8713,40.072834
4,14.3.2 Revised ResNet-34,0.8804,61.152374
5,14.3.3 Revised ResNet-34 with 7x7,0.8544,32.327105
6,14.3.4 Revised ResNet-34 reduced layers,0.7635,25.274931


## 14.3.5.

5. 2-4번의 결과를 보고 accuracy를 유지하는 범위내에서 네트워크 복잡도를 줄여 학습시간을 최소화하는 ResNet-34를 설계하고 학습결과를 비교하시오.

In [33]:
# Ex. 14.3.5
# Modify ResNet-34
tf.keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)
model4e

NameError: name 'model4e' is not defined

In [None]:
# Compile, train and evaluate

# Exercise 14.4   
1. 위의 셀들을 참조하여 base_model을 MobileNet으로 변경하여 학습시키시오.  
2. Xception과 학습시간 및 정확도를 비교하시오.

Pretrained Models for Transfer Learning

In [None]:
import tensorflow_datasets as tfds

dataset, info = tfds.load("tf_flowers", as_supervised=True, with_info=True)
dataset_size = info.splits["train"].num_examples
class_names = info.features["label"].names
n_classes = info.features["label"].num_classes

In [None]:
dataset_size

In [None]:
class_names

In [None]:
n_classes

In [None]:
test_set_raw, valid_set_raw, train_set_raw = tfds.load(
    "tf_flowers",
    split=["train[:10%]", "train[10%:25%]", "train[25%:]"],
    as_supervised=True)

In [None]:
tf.keras.backend.clear_session()  # extra code – resets layer name counter

batch_size = 32
preprocess = tf.keras.Sequential([
    tf.keras.layers.Resizing(height=224, width=224, crop_to_aspect_ratio=True),
    tf.keras.layers.Lambda(tf.keras.applications.xception.preprocess_input)
])
train_set = train_set_raw.map(lambda X, y: (preprocess(X), y))
train_set = train_set.shuffle(1000, seed=42).batch(batch_size).prefetch(1)
valid_set = valid_set_raw.map(lambda X, y: (preprocess(X), y)).batch(batch_size)
test_set = test_set_raw.map(lambda X, y: (preprocess(X), y)).batch(batch_size)

In [None]:
plt.figure(figsize=(5, 18))

index = 0
for image, label in valid_set_raw.take(5):
    index += 1
    plt.subplot(9, 2, 2 * index - 1)
    plt.imshow(image)
    plt.title(f"Before")
    plt.axis("off")

    # 전처리된 이미지
    processed_image = preprocess(tf.expand_dims(image, 0))
    plt.subplot(9, 2, 2 * index)
    plt.imshow(tf.keras.preprocessing.image.array_to_img(processed_image[0]))
    plt.title(f"After")
    plt.axis("off")

In [None]:
result = {'Model': [], 'accuracy': [], 'training_time': []}

In [None]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip(mode="horizontal", seed=42),
    tf.keras.layers.RandomRotation(factor=0.05, seed=42),
    tf.keras.layers.RandomContrast(factor=0.2, seed=42)
])

In [None]:
keras.backend.clear_session()
tf.random.set_seed(42)  # extra code – ensures reproducibility
base_model = tf.keras.applications.xception.Xception(weights="imagenet",
                                                     include_top=False)
avg = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)

output = tf.keras.layers.Dense(n_classes, activation="softmax")(avg)
model6 = tf.keras.Model(inputs=base_model.input, outputs=output)

for layer in base_model.layers:
    layer.trainable = False

optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9)
model6.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer,
               metrics=["accuracy"])
model6.summary()

In [None]:
start_time = time.time()
histroy = model6.fit(train_set, validation_data=valid_set, epochs=3, batch_size=BATCH_SIZE)
end_time = time.time()

score = model6.evaluate(valid_set)
result['accuracy'].append(score[1])
result['training_time'].append(end_time-start_time)
result['Model'].append('Xception')

In [None]:
for indices in zip(range(33), range(33, 66), range(66, 99), range(99, 132)):
    for idx in indices:
        print(f"{idx:3}: {base_model.layers[idx].name:22}", end="")
    print()

In [None]:
for layer in base_model.layers[56:]:
    layer.trainable = True

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
model6.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer,
               metrics=["accuracy"])

start_time = time.time()
histroy = model6.fit(train_set, validation_data=valid_set, epochs=10, batch_size=BATCH_SIZE)
end_time = time.time()

score = model6.evaluate(valid_set)
result['accuracy'].append(score[1])
result['training_time'].append(end_time-start_time)
result['Model'].append('Xception w/ layer(56~) trainable')

MobileNet

In [None]:
# 14.4.1
keras.backend.clear_session()
tf.random.set_seed(42)  # extra code – ensures reproducibility
base_model = tf.keras.applications.MobileNet(weights="imagenet", include_top=False)
avg = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)

output = tf.keras.layers.Dense(n_classes, activation="softmax")(avg)
model7 = tf.keras.Model(inputs=base_model.input, outputs=output)

In [None]:
for layer in base_model.layers:
    layer.trainable = False

In [None]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9)
model7.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer,
               metrics=["accuracy"])

start_time = time.time()
histroy = model7.fit(train_set, validation_data=valid_set, epochs=3)
end_time = time.time()

score = model7.evaluate(valid_set)
result['accuracy'].append(score[1])
result['training_time'].append(end_time-start_time)
result['Model'].append('Mobilenet')

In [None]:
len(base_model.layers)

In [None]:
for indices in zip(range(43), range(43, 86)):
    for idx in indices:
        print(f"{idx:3}: {base_model.layers[idx].name:22}", end="")
    print()

In [None]:
for layer in base_model.layers[73:]:
    layer.trainable = True

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
model6.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer,
               metrics=["accuracy"])

start_time = time.time()
histroy = model7.fit(train_set, validation_data=valid_set, epochs=10)
end_time = time.time()

score = model7.evaluate(valid_set)
result['accuracy'].append(score[1])
result['training_time'].append(end_time-start_time)
result['Model'].append('Mobilenet w/ layer(73~) trainable')

In [None]:
# Compile and train all layers
for layer in base_model.layers:
    layer.trainable = True

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
model6.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer,
               metrics=["accuracy"])

start_time = time.time()
histroy = model7.fit(train_set, validation_data=valid_set, epochs=10)
end_time = time.time()

score = model7.evaluate(valid_set)
result['accuracy'].append(score[1])
result['training_time'].append(end_time-start_time)
result['Model'].append('Mobilenet w/ layer(all) trainable')

#### 14.4.2  비교결과  
1.  Xception 결과
2.  Mobilenet 결과

In [None]:
results_df = pd.DataFrame(result)
display(results_df)

# Exercise 14.5
_Exercise: Go through TensorFlow's [Style Transfer tutorial](https://homl.info/styletuto). It is a fun way to generate art using Deep Learning._  
위의 tutorial 코드를 노트북에서 실행하여 결과를 제출.

In [None]:
os.environ['TFHUB_MODEL_LOAD_FORMAT'] = 'COMPRESSED'

In [None]:
import functools
import time
import PIL.Image
import numpy as np
import matplotlib as mpl
import IPython.display as display
mpl.rcParams['figure.figsize'] = (12, 12)
mpl.rcParams['axes.grid'] = False

In [None]:
def tensor_to_image(tensor):
    tensor = tensor*255
    tensor = np.array(tensor, dtype=np.uint8)
    if np.ndim(tensor) > 3:
        assert tensor.shape[0] == 1
        tensor = tensor[0]
    return PIL.Image.fromarray(tensor)

In [None]:
content_path = tf.keras.utils.get_file(
    'YellowLabradorLooking_new.jpg', 'https://storage.googleapis.com/download.tensorflow.org/example_images/YellowLabradorLooking_new.jpg')
style_path = tf.keras.utils.get_file(
    'kandinsky5.jpg', 'https://storage.googleapis.com/download.tensorflow.org/example_images/Vassily_Kandinsky%2C_1913_-_Composition_7.jpg')

In [None]:
def load_img(path_to_img):
    max_dim = 512
    img = tf.io.read_file(path_to_img)
    img = tf.image.decode_image(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)

    shape = tf.cast(tf.shape(img)[:-1], tf.float32)
    long_dim = max(shape)
    scale = max_dim / long_dim

    new_shape = tf.cast(shape * scale, tf.int32)

    img = tf.image.resize(img, new_shape)
    img = img[tf.newaxis, :]
    return img

In [None]:
def imshow(image, title=None):
    if len(image.shape) > 3:
        image = tf.squeeze(image, axis=0)

    plt.imshow(image)
    if title:
        plt.title(title)

In [None]:
content_image = load_img(content_path)
style_image = load_img(style_path)

plt.subplot(1, 2, 1)
imshow(content_image, 'Content Image')

plt.subplot(1, 2, 2)
imshow(style_image, 'Style Image')

In [None]:
import tensorflow_hub as hub
hub_model = hub.load('https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2')
stylized_image = hub_model(tf.constant(content_image), tf.constant(style_image))[0]
tensor_to_image(stylized_image)

In [None]:
x = tf.keras.applications.vgg19.preprocess_input(content_image*255)
x = tf.image.resize(x, (224, 224))
vgg = tf.keras.applications.VGG19(include_top=True, weights='imagenet')
prediction_probabilities = vgg(x)
prediction_probabilities.shape

In [None]:
predicted_top_5 = tf.keras.applications.vgg19.decode_predictions(prediction_probabilities.numpy())[0]
[(class_name, prob) for (number, class_name, prob) in predicted_top_5]

In [None]:
vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')

print()
for layer in vgg.layers:
    print(layer.name)

In [None]:
content_layers = ['block5_conv2']

style_layers = ['block1_conv1',
                'block2_conv1',
                'block3_conv1',
                'block4_conv1',
                'block5_conv1']

num_content_layers = len(content_layers)
num_style_layers = len(style_layers)

In [None]:
def vgg_layers(layer_names):
    """ Creates a VGG model that returns a list of intermediate output values."""
    # Load our model. Load pretrained VGG, trained on ImageNet data
    vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')
    vgg.trainable = False

    outputs = [vgg.get_layer(name).output for name in layer_names]

    model = tf.keras.Model([vgg.input], outputs)
    return model

In [None]:
style_extractor = vgg_layers(style_layers)
style_outputs = style_extractor(style_image*255)

# Look at the statistics of each layer's output
for name, output in zip(style_layers, style_outputs):
    print(name)
    print("  shape: ", output.numpy().shape)
    print("  min: ", output.numpy().min())
    print("  max: ", output.numpy().max())
    print("  mean: ", output.numpy().mean())
    print()

In [None]:
def gram_matrix(input_tensor):
    result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
    input_shape = tf.shape(input_tensor)
    num_locations = tf.cast(input_shape[1]*input_shape[2], tf.float32)
    return result/(num_locations)

In [None]:
class StyleContentModel(tf.keras.models.Model):
    def __init__(self, style_layers, content_layers):
        super(StyleContentModel, self).__init__()
        self.vgg = vgg_layers(style_layers + content_layers)
        self.style_layers = style_layers
        self.content_layers = content_layers
        self.num_style_layers = len(style_layers)
        self.vgg.trainable = False

    def call(self, inputs):
        "Expects float input in [0,1]"
        inputs = inputs*255.0
        preprocessed_input = tf.keras.applications.vgg19.preprocess_input(inputs)
        outputs = self.vgg(preprocessed_input)
        style_outputs, content_outputs = (outputs[:self.num_style_layers],
                                          outputs[self.num_style_layers:])

        style_outputs = [gram_matrix(style_output)
                         for style_output in style_outputs]

        content_dict = {content_name: value
                        for content_name, value
                        in zip(self.content_layers, content_outputs)}

        style_dict = {style_name: value
                      for style_name, value
                      in zip(self.style_layers, style_outputs)}

        return {'content': content_dict, 'style': style_dict}

In [None]:
extractor = StyleContentModel(style_layers, content_layers)

results = extractor(tf.constant(content_image))

print('Styles:')
for name, output in sorted(results['style'].items()):
    print("  ", name)
    print("    shape: ", output.numpy().shape)
    print("    min: ", output.numpy().min())
    print("    max: ", output.numpy().max())
    print("    mean: ", output.numpy().mean())
    print()

print("Contents:")
for name, output in sorted(results['content'].items()):
    print("  ", name)
    print("    shape: ", output.numpy().shape)
    print("    min: ", output.numpy().min())
    print("    max: ", output.numpy().max())
    print("    mean: ", output.numpy().mean())

In [None]:
style_targets = extractor(style_image)['style']
content_targets = extractor(content_image)['content']

In [None]:
image = tf.Variable(content_image)

In [None]:
def clip_0_1(image):
    return tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1)

In [None]:
style_weight = 1e-2
content_weight = 1e4


def style_content_loss(outputs):
    style_outputs = outputs['style']
    content_outputs = outputs['content']
    style_loss = tf.add_n([tf.reduce_mean((style_outputs[name]-style_targets[name])**2)
                           for name in style_outputs.keys()])
    style_loss *= style_weight / num_style_layers

    content_loss = tf.add_n([tf.reduce_mean((content_outputs[name]-content_targets[name])**2)
                             for name in content_outputs.keys()])
    content_loss *= content_weight / num_content_layers
    loss = style_loss + content_loss
    return loss

In [None]:
@tf.function()
def train_step(image):
    with tf.GradientTape() as tape:
        outputs = extractor(image)
        loss = style_content_loss(outputs)

    grad = tape.gradient(loss, image)
    opt.apply_gradients([(grad, image)])
    image.assign(clip_0_1(image))


train_step(image)
train_step(image)
train_step(image)
tensor_to_image(image)

In [None]:
import time
start = time.time()

epochs = 10
steps_per_epoch = 100

step = 0
for n in range(epochs):
    for m in range(steps_per_epoch):
        step += 1
        train_step(image)
        print(".", end='', flush=True)
    display.clear_output(wait=True)
    display.display(tensor_to_image(image))
    print("Train step: {}".format(step))

end = time.time()
print("Total time: {:.1f}".format(end-start))

In [None]:
def high_pass_x_y(image):
    x_var = image[:, :, 1:, :] - image[:, :, :-1, :]
    y_var = image[:, 1:, :, :] - image[:, :-1, :, :]

    return x_var, y_var

In [None]:
x_deltas, y_deltas = high_pass_x_y(content_image)

plt.figure(figsize=(14, 10))
plt.subplot(2, 2, 1)
imshow(clip_0_1(2*y_deltas+0.5), "Horizontal Deltas: Original")

plt.subplot(2, 2, 2)
imshow(clip_0_1(2*x_deltas+0.5), "Vertical Deltas: Original")

x_deltas, y_deltas = high_pass_x_y(image)

plt.subplot(2, 2, 3)
imshow(clip_0_1(2*y_deltas+0.5), "Horizontal Deltas: Styled")

plt.subplot(2, 2, 4)
imshow(clip_0_1(2*x_deltas+0.5), "Vertical Deltas: Styled")

In [None]:
plt.figure(figsize=(14, 10))

sobel = tf.image.sobel_edges(content_image)
plt.subplot(1, 2, 1)
imshow(clip_0_1(sobel[..., 0]/4+0.5), "Horizontal Sobel-edges")
plt.subplot(1, 2, 2)
imshow(clip_0_1(sobel[..., 1]/4+0.5), "Vertical Sobel-edges")

In [None]:
def total_variation_loss(image):
    x_deltas, y_deltas = high_pass_x_y(image)
    return tf.reduce_sum(tf.abs(x_deltas)) + tf.reduce_sum(tf.abs(y_deltas))


total_variation_loss(image).numpy()

In [None]:
tf.image.total_variation(image).numpy()

In [None]:
total_variation_weight = 30

In [None]:
@tf.function()
def train_step(image):
    with tf.GradientTape() as tape:
        outputs = extractor(image)
        loss = style_content_loss(outputs)
        loss += total_variation_weight*tf.image.total_variation(image)

    grad = tape.gradient(loss, image)
    opt.apply_gradients([(grad, image)])
    image.assign(clip_0_1(image))


opt = tf.keras.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1)
image = tf.Variable(content_image)

In [None]:
import time
start = time.time()

epochs = 10
steps_per_epoch = 100

step = 0
for n in range(epochs):
    for m in range(steps_per_epoch):
        step += 1
        train_step(image)
        print(".", end='', flush=True)
    display.clear_output(wait=True)
    display.display(tensor_to_image(image))
    print("Train step: {}".format(step))

end = time.time()
print("Total time: {:.1f}".format(end-start))

In [None]:
file_name = 'stylized-image.png'
tensor_to_image(image).save(file_name)

try:
    from google.colab import files
except ImportError:
    pass
else:
    files.download(file_name)