In [26]:
import tensorflow as tf
from tensorflow.python.keras import backend as K
from tensorflow.python.keras import layers
from tensorflow import nn

class DeformableConvLayer(layers.Conv2D):
    """Only support "channel last" data format"""
    def __init__(self,
                 filters,
                 kernel_size,
                 strides=(1, 1),
                 padding='valid',
                 data_format=None,
                 dilation_rate=(1, 1),
                 num_deformable_group=None,
                 activation=None,
                 use_bias=True,
                 kernel_initializer='glorot_uniform',
                 bias_initializer='zeros',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 **kwargs):
        """`kernel_size`, `strides` and `dilation_rate` must have the same value in both axis.
        :param num_deformable_group: split output channels into groups, offset shared in each group. If
        this parameter is None, then set  num_deformable_group=filters.
        """
        super().__init__(
            filters=filters,
            kernel_size=kernel_size,
            strides=strides,
            padding=padding,
            data_format=data_format,
            dilation_rate=dilation_rate,
            activation=activation,
            use_bias=use_bias,
            kernel_initializer=kernel_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer,
            activity_regularizer=activity_regularizer,
            kernel_constraint=kernel_constraint,
            bias_constraint=bias_constraint,
            **kwargs)
        self.kernel = None
        self.bias = None
        self.offset_layer_kernel = None
        self.offset_layer_bias = None
        if num_deformable_group is None:
            num_deformable_group = filters
        if filters % num_deformable_group != 0:
            raise ValueError('"filters" mod "num_deformable_group" must be zero')
        self.num_deformable_group = num_deformable_group

    def build(self, input_shape):
        input_dim = int(input_shape[-1])
        #kernel_shape = self.kernel_size + (input_dim, self.filters)
        # we want to use depth-wise conv
        kernel_shape = self.kernel_size + (self.filters * input_dim, 1)
        self.kernel = self.add_weight(
            name='kernel',
            shape=kernel_shape,
            initializer=self.kernel_initializer,
            regularizer=self.kernel_regularizer,
            constraint=self.kernel_constraint,
            trainable=True,
            dtype=self.dtype)
        if self.use_bias:
            self.bias = self.add_weight(
                name='bias',
                shape=(self.filters,),
                initializer=self.bias_initializer,
                regularizer=self.bias_regularizer,
                constraint=self.bias_constraint,
                trainable=True,
                dtype=self.dtype)

        # create offset conv layer
        offset_num = self.kernel_size[0] * self.kernel_size[1] * self.num_deformable_group
        #print("offset_num: %s" % offset_num)
        self.offset_layer_kernel = self.add_weight(
            name='offset_layer_kernel',
            shape=self.kernel_size + (input_dim, offset_num * 2),  # 2 means x and y axis
            initializer=tf.zeros_initializer(),
            regularizer=self.kernel_regularizer,
            trainable=True,
            dtype=self.dtype)
        #print(self.offset_layer_kernel)
        self.offset_layer_bias = self.add_weight(
            name='offset_layer_bias',
            shape=(offset_num * 2,),
            initializer=tf.zeros_initializer(),
            # initializer=tf.random_uniform_initializer(-5, 5),
            regularizer=self.bias_regularizer,
            trainable=True,
            dtype=self.dtype)
        self.built = True

    def call(self, inputs, training=None, **kwargs):
        # get offset, shape [batch_size, out_h, out_w, filter_h, * filter_w * channel_out * 2]
        offset = nn.conv2d(inputs,
                          filter=self.offset_layer_kernel,
                          strides=[1, *self.strides, 1],
                          padding=self.padding.upper(),
                          dilations=[1, *self.dilation_rate, 1])
        # shape of offset: n_batch, feature_map_x, feature_map_y, offset_num*2
        offset += self.offset_layer_bias

        # add padding if needed
        inputs = self._pad_input(inputs)

        # some length
        batch_size = K.shape(offset)[0]
        #offset_size = int(offset.get_shape().as_list()[-1] / 2)
        channel_in = inputs.get_shape().as_list()[-1]
        in_h, in_w = inputs.get_shape().as_list()[1:3]  # input feature map size
        out_h, out_w = offset.get_shape().as_list()[1:3]  # output feature map size
        filter_h, filter_w = self.kernel_size

        # get x, y axis offset
        offset = tf.reshape(offset, [batch_size, out_h, out_w, -1, 2])
        y_off, x_off = offset[:, :, :, :, 0], offset[:, :, :, :, 1]

        # input feature map gird coordinates
        y, x = self._get_conv_indices([in_h, in_w]) # [1, out_h, out_w, filter_h*filter_w]
        y, x = [tf.expand_dims(i, axis=-1) for i in [y, x]]
        
        y, x = [tf.tile(i, [batch_size, 1, 1, 1, self.num_deformable_group]) for i in [y, x]] # make batch_dim and filter_dim equal to kernel
        y, x = [tf.reshape(i, [K.shape(i)[0], *i.shape[1: 3], filter_h*filter_w*self.num_deformable_group]) for i in [y, x]]
        print(y)
        y, x = [tf.to_float(i) for i in [y, x]]
        
        # print(y, x)
        # add offset
        y, x = y + y_off, x + x_off
        y = tf.clip_by_value(y, 0, in_h - 1)
        x = tf.clip_by_value(x, 0, in_w - 1)

        # get four coordinates of points around (x, y)
        y0, x0 = [tf.to_int32(tf.floor(i)) for i in [y, x]]
        y1, x1 = y0 + 1, x0 + 1
        # clip
        y0, y1 = [tf.clip_by_value(i, 0, in_h - 1) for i in [y0, y1]]
        x0, x1 = [tf.clip_by_value(i, 0, in_w - 1) for i in [x0, x1]]

        # get pixel values
        indices = [[y0, x0], [y0, x1], [y1, x0], [y1, x1]]
        p0, p1, p2, p3 = [self._get_pixel_values_at_point(inputs, i) for i in indices]
        # cast to float
        x0, x1, y0, y1 = [tf.to_float(i) for i in [x0, x1, y0, y1]]
        # weights
        w0 = (y1 - y) * (x1 - x)
        w1 = (y1 - y) * (x - x0)
        w2 = (y - y0) * (x1 - x)
        w3 = (y - y0) * (x - x0)
        # expand dim for broadcast
        w0, w1, w2, w3 = [tf.expand_dims(i, axis=-1) for i in [w0, w1, w2, w3]]
        # bilinear interpolation
        pixels = tf.add_n([w0 * p0, w1 * p1, w2 * p2, w3 * p3])
        
        ### ==== ###
        """
        print(pixels)
        pixels = self._rebuild_shape_to_batch(pixels)
        print(pixels)
        """
        ### ==== ###

        # reshape the "big" feature map
        pixels = tf.reshape(pixels, [batch_size, out_h, out_w, filter_h, filter_w, self.num_deformable_group, channel_in])
        pixels = tf.transpose(pixels, [0, 1, 3, 2, 4, 5, 6])
        pixels = tf.reshape(pixels, [batch_size, out_h * filter_h, out_w * filter_w, self.num_deformable_group, channel_in])

        # copy channels to same group
        feat_in_group = self.filters // self.num_deformable_group
        pixels = tf.tile(pixels, [1, 1, 1, 1, feat_in_group])
        pixels = tf.reshape(pixels, [batch_size, out_h * filter_h, out_w * filter_w, -1])

        # depth-wise conv
        out = tf.nn.depthwise_conv2d(pixels, self.kernel, [1, filter_h, filter_w, 1], 'VALID')
        # add the output feature maps in the same group
        out = tf.reshape(out, [-1, out_h, out_w, self.filters, channel_in])
        out = tf.reduce_sum(out, axis=-1)
        if self.use_bias:
            out += self.bias
        return self.activation(out)

    def _pad_input(self, inputs):
        """Check if input feature map needs padding, because we don't use the standard Conv() function.
        :param inputs:
        :return: padded input feature map
        """
        # When padding is 'same', we should pad the feature map.
        # if padding == 'same', output size should be `ceil(input / stride)`
        if self.padding == 'same':
            in_shape = inputs.get_shape().as_list()[1: 3]
            padding_list = []
            for i in range(2):
                filter_size = self.kernel_size[i]
                dilation = self.dilation_rate[i]
                dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1)
                same_output = (in_shape[i] + self.strides[i] - 1) // self.strides[i]
                valid_output = (in_shape[i] - dilated_filter_size + self.strides[i]) // self.strides[i]
                if same_output == valid_output:
                    padding_list += [0, 0]
                else:
                    p = dilated_filter_size - 1
                    p_0 = p // 2
                    padding_list += [p_0, p - p_0]
            if sum(padding_list) != 0:
                padding = [[0, 0],
                           [padding_list[0], padding_list[1]],  # top, bottom padding
                           [padding_list[2], padding_list[3]],  # left, right padding
                           [0, 0]]
                inputs = tf.pad(inputs, padding)
        return inputs

    def _get_conv_indices(self, feature_map_size):
        """the x, y coordinates in the window when a filter sliding on the feature map
        :param feature_map_size:
        :return: y, x with shape [1, out_h, out_w, filter_h * filter_w]
        """
        feat_h, feat_w = [int(i) for i in feature_map_size[0: 2]]

        x, y = tf.meshgrid(tf.range(feat_w), tf.range(feat_h)) # shape: 2d, x: h*w, y: h*w
        # reshape meshgrid into 1,h,w,1
        x, y = [tf.reshape(i, [1, *i.get_shape(), 1]) for i in [x, y]]  # shape: 4d, [1, h, w, 1]
        x, y = [tf.image.extract_image_patches(i,
                                               [1, *self.kernel_size, 1],
                                               [1, *self.strides, 1],
                                               [1, *self.dilation_rate, 1],
                                               'VALID')
                for i in [x, y]]  # shape [1, out_h, out_w, filter_h * filter_w]
        return y, x

    @staticmethod
    def _get_pixel_values_at_point(inputs, indices):
        """get pixel values
        :param inputs:
        :param indices: shape [batch_size, H, W, I], I = filter_h * filter_w * channel_out
        :return:
        """
        y, x = indices
        batch, h, w, n = y.get_shape().as_list()[0: 4]
        #batch = 32 if batch is None else batch
        batch = K.shape(y)[0]

        batch_idx = tf.reshape(tf.range(0, batch), (batch, 1, 1, 1))
        b = tf.tile(batch_idx, (1, h, w, n))
        pixel_idx = tf.stack([b, y, x], axis=-1)
        return tf.gather_nd(inputs, pixel_idx)
    
    @staticmethod
    def _rebuild_shape_to_batch(tensor_to_rebuild):
        """In _get_pixel_values_at_point, batch dimension has already been set to stablize graph,
        We convert it back to None for batch dim
        """
        new_shape = tf.TensorShape([None]).concatenate(tensor_to_rebuild.get_shape()[1:])
        output = tf.placeholder_with_default(tensor_to_rebuild, shape=new_shape)
        return output


In [5]:
# model.py
import tensorflow as tf
from tensorflow.python.keras.models import Model
from tensorflow.python.keras import layers
from DeformableConv_TF.model import graph_mapping


model_fn = graph_mapping["R-50-v1"]
pretrain_modules = model_fn(include_top=False, 
                            input_shape=(256, 256, 3), 
                            norm_use="bn", 
                            weights=None, 
                            use_deformable=True)
gap = tf.keras.layers.GlobalAveragePooling2D()(pretrain_modules.output)

64
64
64


In [4]:
pretrain_modules.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 256, 256, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 262, 262, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 128, 128, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 128, 128, 64) 256         conv1_conv[0][0]                 
__________________________________________________________________________________________________
conv1_relu

In [21]:
32*9

288

In [25]:
a = tf.keras.layers.Input((28, 28, 16), dtype=tf.float32)
#a = tf.Variable(np.random.random((128, 28, 28, 1)), dtype=tf.float32)
b = DeformableConvLayer(filters=32, kernel_size=3, padding="same")(a)

Tensor("deformable_conv_layer_8/Tile:0", shape=(?, 28, 28, 9, 32), dtype=int32)
3 3 16 32
Tensor("deformable_conv_layer_8/Reshape_3:0", shape=(?, 28, 28, 144), dtype=int32)


In [54]:
Tensor("deformable_conv_layer_39/ToFloat:0", shape=(1, 28, 28, 288), dtype=float32)
a Tensor("deformable_conv_layer_39/Tile_6:0", shape=(128, 84, 84, 32, 1), dtype=float32)
b Tensor("deformable_conv_layer_39/Reshape_11:0", shape=(128, 84, 84, 32), dtype=float32)

<tf.Tensor 'deformable_conv_layer_20/add_5:0' shape=(128, 26, 26, 32) dtype=float32>

In [135]:
#new_shape = tf.TensorShape([None]).concatenate(b.get_shape()[1:])
#b = tf.placeholder_with_default(b, shape=new_shape)

b

<tf.Tensor 'deformable_conv_layer_53/add_5:0' shape=(?, 28, 28, 32) dtype=float32>

In [None]:
### Start ###

In [1]:
# train.py / main
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from config import get_cfg_defaults
from DeformableConv_TF.model import preproc_fn, build_model, build_resnet_model
cfg = get_cfg_defaults()
print(cfg)

  from ._conv import register_converters as _register_converters


DATASET:
  SET: mnist
MODEL:
  BACKBONE: 
  BATCH_SIZE: 32
  EPOCHS: 100
  LEARNING_RATE: 0.001
  NUM_DEFORM_GROUP: 0
  OPTIMIZER: SGD
  USE_DEFORMABLE_CONV: False
SYSTEM:
  DEVICES: []


In [2]:
cfg.MODEL.BACKBONE = "R-50-v1"
cfg.MODEL.USE_DEFORMABLE_CONV = True
cfg.MODEL.NUM_DEFORM_GROUP = 1
cfg.DATASET.SET = "cat-dog"

In [3]:
devices = ",".join(str(i) for i in cfg.SYSTEM.DEVICES)
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = devices

from dataloader import GetDataset, DataLoader, TrainingAugmentation, TestingAugmentation
from sklearn.model_selection import train_test_split
from utils import Fetch_dataset

dset = Fetch_dataset(dataset_name=cfg.DATASET.SET)
trainset, testset = dset.load_data()
x_train, y_train = trainset
x_test, y_test = testset

idx = np.arange(len(x_train))
idx_train, idx_valid = train_test_split(idx, test_size=0.1)
x_train, x_valid = x_train[idx_train], x_train[idx_valid]
y_train, y_valid = y_train[idx_train], y_train[idx_valid]

dataset_train = GetDataset(x=x_train, y=y_train, num_classes=max(y_test)+1, 
                           preproc_fn=preproc_fn, augment_fn=TrainingAugmentation.augmentation)
dataset_valid = GetDataset(x=x_valid, y=y_valid, num_classes=max(y_test)+1, 
                           preproc_fn=preproc_fn, augment_fn=TestingAugmentation.augmentation)

dataloader = DataLoader(dataset=dataset_train, batch_size=cfg.MODEL.BATCH_SIZE)

x_valid, y_valid = next(iter(DataLoader(dataset_valid, batch_size=len(dataset_valid))))
print(x_valid.shape)

100%|██████████| 5000/5000 [00:47<00:00, 104.20it/s]
100%|██████████| 2222/2222 [00:17<00:00, 126.38it/s]


(500, 256, 256, 3)


In [4]:
if cfg.MODEL.BACKBONE == "":
    model = build_model(input_shape=x_valid.shape[1:], 
                        output_num=y_valid.shape[-1], 
                        use_deformable=cfg.MODEL.USE_DEFORMABLE_CONV,
                        num_deform_group=cfg.MODEL.NUM_DEFORM_GROUP)
else:
    model = build_resnet_model(input_shape=x_valid.shape[1:], 
                               output_num=y_valid.shape[-1], 
                               use_deformable=cfg.MODEL.USE_DEFORMABLE_CONV,
                               num_deform_group=cfg.MODEL.NUM_DEFORM_GROUP,
                               backbone=cfg.MODEL.BACKBONE)
    
optim = tf.keras.optimizers.SGD(lr=cfg.MODEL.LEARNING_RATE, nesterov=True, momentum=0.95)
#optim = tf.keras.optimizers.Adam(lr=cfg.MODEL.LEARNING_RATE)
model.compile(loss="categorical_crossentropy", metrics=["acc"], optimizer=optim)
model.summary()

64
64
64
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 256, 256, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 262, 262, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 128, 128, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 128, 128, 64) 256         conv1_conv[0][0]                 
__________________________________________________________________________________________________
c

In [5]:
model.fit_generator(dataloader, 
                    epochs=cfg.MODEL.EPOCHS, 
                    steps_per_epoch=len(dataloader), 
                    validation_data=(x_valid, y_valid))

Epoch 1/100
  1/141 [..............................] - ETA: 5:41:24 - loss: 0.8022 - acc: 0.4375

KeyboardInterrupt: 

Debug

In [14]:
model.layers

[<tensorflow.python.keras.engine.input_layer.InputLayer at 0x7f92f38902e8>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x7f924633c3c8>,
 <tensorflow.python.keras.layers.normalization.BatchNormalization at 0x7f93663bf320>,
 <tensorflow.python.keras.layers.core.Activation at 0x7f93663c8f28>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x7f936054f470>,
 <DeformableConv_TF.layers.deformable_layers.DeformableConvLayer at 0x7f92457f22e8>,
 <tensorflow.python.keras.layers.normalization.BatchNormalization at 0x7f923f254908>,
 <tensorflow.python.keras.layers.core.Activation at 0x7f923f355400>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x7f9221842128>,
 <DeformableConv_TF.layers.deformable_layers.DeformableConvLayer at 0x7f9221842828>,
 <tensorflow.python.keras.layers.normalization.BatchNormalization at 0x7f92217d1a58>,
 <tensorflow.python.keras.layers.core.Activation at 0x7f9221773470>,
 <tensorflow.python.keras.layers.pooling.GlobalAveragePooling2D at 

In [15]:
xl = model.layers[5]
xl

<DeformableConv_TF.layers.deformable_layers.DeformableConvLayer at 0x7f92457f22e8>

In [16]:
xl.weights

[<tf.Variable 'deformable_conv_layer/kernel:0' shape=(3, 3, 2048, 1) dtype=float32>,
 <tf.Variable 'deformable_conv_layer/bias:0' shape=(64,) dtype=float32>,
 <tf.Variable 'deformable_conv_layer/offset_layer_kernel:0' shape=(3, 3, 32, 1152) dtype=float32>,
 <tf.Variable 'deformable_conv_layer/offset_layer_bias:0' shape=(1152,) dtype=float32>]

In [10]:
from tensorflow.keras.datasets.cifar10 import load_data
trainset, testset = load_data()

In [83]:
import numpy as np
import glob
from tensorflow.python.keras import datasets
import cv2
from pathlib import Path


class Fetch_dataset():
    def __init__(self, dataset_name="mnist", custom_load_fn=None):
        self.dataset_map = {
            "mnist":datasets.mnist,
            "fashion-mnist":datasets.fashion_mnist,
            "cifar10":datasets.cifar10,
            "cifar100":datasets.cifar100,
        }
        self.dataset_name = dataset_name
        self.custom_load_fn = custom_load_fn
        
    def load_data(self):
        if self._check_dataset_in_map():
            fetch_fn = self.dataset_map[self.dataset_name]
        else:
            fetch_fn = self.custom_load_fn
            
        return fetch_fn.load_data()
    
    def _check_dataset_in_map(self):
        return self.dataset_name in list(self.dataset_map.keys())
    
class DatasetCatdog():
    def __init__(self, train_path=[], test_path=[], num_train_images=5000, imsize=(256,256)):
        self.train_path = train_path
        self.test_path = test_path
        self.num_train_images = num_train_images
        self.imsize = imsize
        
    def load_data(self):
        train_path = self.read_path(self.train_path)
        n_take = self.num_train_images if len(train_path) >= self.num_train_images else len(train_path)
        train_path = np.random.choice(train_path, n_take)
        train_images = np.array([cv2.cvtColor(cv2.resize(cv2.imread(i), self.imsize), cv2.COLOR_BGR2RGB) for i in train_path])
        train_targets = [self.parse_target(i) for i in train_path]
        
        test_path = self.read_path(self.test_path)
        test_images = np.array([cv2.cvtColor(cv2.resize(cv2.imread(i), self.imsize), cv2.COLOR_BGR2RGB) for i in test_path])
        test_targets = [self.parse_target(i) for i in test_path]
        
        return (train_images, train_targets), (test_images, test_targets)
        
    def read_path(self, path):
        all_path = [glob.glob(i+"*.jpg") for i in path]
        all_path = [item for i in all_path for item in i]
        return all_path
    
    def parse_target(self, path):
        return int("dog" in os.path.basename(path))

class DatasetPCAM():
    def __init__(self, train_path=[], test_path=[], num_train_images=5000):
        self.train_path = train_path
        self.test_path = test_path
        self.num_train_images = num_train_images
        
    def load_data(self):
        train_path = self.read_path(self.train_path)
        n_take = self.num_train_images if len(train_path) >= self.num_train_images else len(train_path)
        train_path = np.random.choice(train_path, n_take)
        train_images = np.array([cv2.cvtColor(cv2.imread(i), cv2.COLOR_BGR2RGB) for i in train_path])
        train_targets = [self.parse_target(i) for i in train_path]
        
        test_path = self.read_path(self.test_path)
        test_images = np.array([cv2.cvtColor(cv2.imread(i), cv2.COLOR_BGR2RGB) for i in test_path])
        test_targets = [self.parse_target(i) for i in test_path]
        
        return (train_images, train_targets), (test_images, test_targets)
        
    def read_path(self, path):
        all_path = [glob.glob(i+"*.tif") for i in path]
        all_path = [item for i in all_path for item in i]
        return all_path
    
    def parse_target(self, path):
        p = Path(path)
        return p.parts[-2]

In [77]:
d = DatasetCatdog(train_path=["/mnt/extension/experiment/cat_dog/train/training/"],
              test_path=["/mnt/extension/experiment/cat_dog/train/valid/"], num_train_images=100)

In [59]:
a,b = d.load_data()

In [72]:
d = DatasetPCAM(train_path=["/mnt/extension/experiment/pcam/base_dir/train_dir/0/",
                            "/mnt/extension/experiment/pcam/base_dir/train_dir/1/"],
                test_path=["/mnt/extension/experiment/pcam/base_dir/val_dir/0/",
                           "/mnt/extension/experiment/pcam/base_dir/val_dir/1/"], 
                num_train_images=100)

In [2]:
dd = Fetch_dataset(dataset_name="cat-dog")
a,b = dd.load_data()

100%|██████████| 5000/5000 [00:50<00:00, 98.06it/s]
100%|██████████| 2222/2222 [00:15<00:00, 146.93it/s]


In [1]:
from utils import Fetch_dataset

  from ._conv import register_converters as _register_converters


AttributeError: 'tuple' object has no attribute 'shape'