In [1]:
import numpy as np

import tensorflow as tf
from tensorflow import keras

In [2]:
def corr2d(X, K):
    h, w = K.shape
    Y = tf.Variable(tf.zeros(
        (X.shape[0] - h + 1, X.shape[1] - w + 1)
    ))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j].assign(tf.cast(
                tf.reduce_sum(X[i:i+h, j:j+w] * K), 
                dtype=tf.float32
            ))
    return Y

X = tf.constant([
    [0, 1, 2], 
    [3, 4, 5], 
    [6, 7, 8],
])
K = tf.constant([
    [0, 1], 
    [2, 3],
])
corr2d(X, K)

<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[19., 25.],
       [37., 43.]], dtype=float32)>

In [3]:
class Conv2D(keras.layers.Layer):
    def __init__(self, units):
        super().__init__()
        self.units = units
        return 
    
    def build(self, kernel_size):
        self.W = self.add_weight(
            name='W', 
            shape=kernel_size, 
            initializer=tf.random_normal_initializer()
        )
        self.b = self.add_weight(
            name='b', 
            shape=(1,),
            initializer=tf.zeros_initializer()
        )
        return 
    
    def call(self, inputs):
        return corr2d(inputs, self.W) + self.b
    


In [4]:
# edge detection
X = tf.Variable(tf.ones((6, 8)))
X[:, 2:6].assign(tf.zeros(X[:, 2:6].shape))
print(X)

K = tf.constant([
    [1.0, -1.0]
])
print(K)

Y = corr2d(X, K)
print(Y)

<tf.Variable 'Variable:0' shape=(6, 8) dtype=float32, numpy=
array([[1., 1., 0., 0., 0., 0., 1., 1.],
       [1., 1., 0., 0., 0., 0., 1., 1.],
       [1., 1., 0., 0., 0., 0., 1., 1.],
       [1., 1., 0., 0., 0., 0., 1., 1.],
       [1., 1., 0., 0., 0., 0., 1., 1.],
       [1., 1., 0., 0., 0., 0., 1., 1.]], dtype=float32)>
tf.Tensor([[ 1. -1.]], shape=(1, 2), dtype=float32)
<tf.Variable 'Variable:0' shape=(6, 7) dtype=float32, numpy=
array([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
       [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
       [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
       [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
       [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
       [ 0.,  1.,  0.,  0.,  0., -1.,  0.]], dtype=float32)>


In [5]:
X = tf.reshape(X, (1, 6, 8, 1))
Y = tf.reshape(Y, (1, 6, 7, 1))

Y.shape

TensorShape([1, 6, 7, 1])

In [6]:
conv2d = keras.layers.Conv2D(filters=1, kernel_size=(1, 2))
Y_pred = conv2d(X)
for i in range(10):
    with tf.GradientTape(watch_accessed_variables=False) as tape:
        tape.watch(conv2d.weights[0])
        Y_pred = conv2d(X)
        loss1 = (abs(Y_pred - Y)) ** 2
        grad = tape.gradient(loss1, conv2d.weights[0])
        lr = 3e-2
        update = lr * grad
        updated_weights = conv2d.get_weights()
        updated_weights[0] = conv2d.weights[0] - update
        conv2d.set_weights(updated_weights)
        print('batch %d, loss %.3f' % (i+1, tf.reduce_sum(loss1)))


batch 1, loss 34.928
batch 2, loss 15.513
batch 3, loss 7.127
batch 4, loss 3.414
batch 5, loss 1.715
batch 6, loss 0.905
batch 7, loss 0.500
batch 8, loss 0.288
batch 9, loss 0.171
batch 10, loss 0.104


In [7]:
tf.reshape(conv2d.get_weights()[0],(1,2))


<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[ 0.95013607, -1.0135953 ]], dtype=float32)>

In [12]:
# try padding
def comp_conv2d(conv2d, X):
    # shape should be: (samples, rows, cols, channels)
    X = tf.reshape(X, [1] + X.shape + [1])
    Y = conv2d(X)
    return tf.reshape(Y, Y.shape[1:3])

conv2d = keras.layers.Conv2D(1, kernel_size=3, padding='same')
X = tf.random.uniform([8, 8])
print(comp_conv2d(conv2d, X).shape)

# input: (nh, nw)
# kernel: (kh, kw)
# normally output: (nh-kh+1, nw-kw+1)
# padding output: (nh-kh+ph+1, nw-kw+pw+1)
# stride output: ( (nh-kh+ph+sh)/sh, (nw-kw+pw+sw)/sw )
conv2d = keras.layers.Conv2D(
    1, 
    kernel_size=3, 
    padding='same', 
    strides=2
)
print(comp_conv2d(conv2d, X).shape)


conv2d = tf.keras.layers.Conv2D(1, kernel_size=(3,5), padding='valid', strides=(3,4))
print(comp_conv2d(conv2d, X).shape)


(8, 8)
(4, 4)
(2, 1)


In [27]:
def corr2d(X, K):
    kh, kw = K.shape
    if len(X.shape) <= 1:
        X = tf.reshape(X, [X.shape[0], 1])
    Y = tf.Variable(tf.zeros([X.shape[0] - kh + 1, X.shape[1] - kw + 1]))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i,j].assign(
                tf.cast(
                    tf.reduce_sum(X[i:i+kh, j:j+kw] * K), 
                    dtype=tf.float32
                )
            )
    return Y

def corr2d_multi_in(X, K):
    for i in range(X.shape[0]):
        print(corr2d(X[i], K[i]))
    return tf.reduce_sum([
        corr2d(X[i], K[i])
        for i in range(X.shape[0])
    ], axis=0) 

X = tf.constant([
    [
        [0,1,2],
        [3,4,5],
        [6,7,8]
    ],
    [
        [1,2,3],
        [4,5,6],
        [7,8,9]
    ]
])  

K = tf.constant([
    [
        [0,1],
        [1,0]
    ],
    [
        [1,0],
        [0,1]
    ]
])
print(X.shape, K.shape)

corr2d_multi_in(X, K)

(2, 3, 3) (2, 2, 2)
<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[ 4.,  6.],
       [10., 12.]], dtype=float32)>
<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[ 6.,  8.],
       [12., 14.]], dtype=float32)>


<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[10., 14.],
       [22., 26.]], dtype=float32)>

In [29]:
def corr2d_multi_in_out(X, K):
    return tf.stack(
        [corr2d_multi_in(X, k) for k in K], 
        axis=0
    )

# K = tf.stack([K, K+1, K+2], axis=0)
# print(K.shape)
corr2d_multi_in_out(X, [K, K+1, K+2])

<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[ 4.,  6.],
       [10., 12.]], dtype=float32)>
<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[ 6.,  8.],
       [12., 14.]], dtype=float32)>
<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[12., 18.],
       [30., 36.]], dtype=float32)>
<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[18., 24.],
       [36., 42.]], dtype=float32)>
<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[20., 30.],
       [50., 60.]], dtype=float32)>
<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[30., 40.],
       [60., 70.]], dtype=float32)>


<tf.Tensor: shape=(3, 2, 2), dtype=float32, numpy=
array([[[ 10.,  14.],
        [ 22.,  26.]],

       [[ 30.,  42.],
        [ 66.,  78.]],

       [[ 50.,  70.],
        [110., 130.]]], dtype=float32)>

In [31]:
def corr2d_multi_in_out_1x1(X, K):
    c_i, h, w = X.shape
    c_o = K.shape[0]
    X = tf.reshape(X, [c_i, h*w])
    K = tf.reshape(K, [c_o, c_i])
    Y = tf.matmul(K, X)
    return tf.reshape(Y, [c_o, h, w])

X = tf.random.uniform((3,3,3))
K = tf.random.uniform((2,3,1,1))

Y1 = corr2d_multi_in_out_1x1(X, K)
Y2 = corr2d_multi_in_out(X, K)

tf.norm(Y1-Y2) < 1e-6


<tf.Variable 'Variable:0' shape=(3, 3) dtype=float32, numpy=
array([[0.2556957 , 0.13596115, 0.3563068 ],
       [0.42687646, 0.09416759, 0.37894204],
       [0.30151868, 0.37034634, 0.55749667]], dtype=float32)>
<tf.Variable 'Variable:0' shape=(3, 3) dtype=float32, numpy=
array([[0.09723694, 0.17561707, 0.35297734],
       [0.31168556, 0.26599225, 0.02346409],
       [0.16807643, 0.24897598, 0.21953052]], dtype=float32)>
<tf.Variable 'Variable:0' shape=(3, 3) dtype=float32, numpy=
array([[0.24050224, 0.11232375, 0.08895671],
       [0.13278887, 0.08720583, 0.28041792],
       [0.20423491, 0.05328164, 0.05286534]], dtype=float32)>
<tf.Variable 'Variable:0' shape=(3, 3) dtype=float32, numpy=
array([[0.3590972 , 0.19094285, 0.5003947 ],
       [0.59950227, 0.1322483 , 0.53218347],
       [0.42345068, 0.52011174, 0.78294426]], dtype=float32)>
<tf.Variable 'Variable:0' shape=(3, 3) dtype=float32, numpy=
array([[0.12584777, 0.22729035, 0.45683682],
       [0.4033954 , 0.34425735, 0.03036813

<tf.Tensor: shape=(), dtype=bool, numpy=True>

In [34]:
def pool2d(X, pool_size, mode='max'):
    ph, pw = pool_size
    Y = tf.zeros([X.shape[0] - ph + 1, X.shape[1] - pw + 1])
    Y = tf.Variable(Y)
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i, j].assign(tf.reduce_max(X[i:i+ph, j:j+pw]))
            elif mode=='avg':
                Y[i, j].assign(tf.reduce_mean(X[i:i+ph, j:j+pw]))
    return Y

X = tf.constant([
    [0,1,2],
    [3,4,5],
    [6,7,8]
],dtype=tf.float32)
print(pool2d(X, (2,2), mode='max'))
print(pool2d(X, (2,2), mode='avg'))

<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[4., 5.],
       [7., 8.]], dtype=float32)>
<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[2., 3.],
       [5., 6.]], dtype=float32)>


In [38]:
#tensorflow default data_format == 'channels_last'
#so (1,4,4,1) instead of (1,1,4,4)
X = tf.reshape(tf.constant(range(16)), (1,4,4,1))

pool2d = keras.layers.MaxPool2D(pool_size=[3,3])
print(pool2d(X))

#I guess no custom padding settings in keras.layers?
pool2d = keras.layers.MaxPool2D(pool_size=[3,3],padding='same',strides=2)
print(pool2d(X))

X = tf.stack([X, X+1], axis=3)
X = tf.reshape(X, (2,4,4,1))

pool2d = keras.layers.MaxPool2D(3, padding='same', strides=2)
pool2d(X)


tf.Tensor([[[[10]]]], shape=(1, 1, 1, 1), dtype=int32)
tf.Tensor(
[[[[10]
   [11]]

  [[14]
   [15]]]], shape=(1, 2, 2, 1), dtype=int32)


<tf.Tensor: shape=(2, 2, 2, 1), dtype=int32, numpy=
array([[[[ 5],
         [ 6]],

        [[ 7],
         [ 8]]],


       [[[13],
         [14]],

        [[15],
         [16]]]])>