There seems to be a error when using more than one layer of pooling of conv_transpose. Perhaps it will be fixed in a later flex-conv commit. currently at commit e2fdad61d93a2ec830f68b9cf130b4688a4c8040 (20181016)

In [1]:
%reload_ext autoreload
%autoreload 2

In [3]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = ""

In [4]:

from keras import backend as K
K.set_image_data_format('channels_first')
import tensorflow as tf

import numpy as np
import tensorflow as tf
from tabulate import tabulate
from layers import flex_convolution, flex_convolution_transpose, flex_pooling

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [5]:
###### img_rows = 512
img_cols = 512
img_channels = 3
batch_size = 1
num_epochs = 100
input_positions = 3
k = 5
network_channel_sizes = (8, 16, 16, 32, 32, 64, 64, 128, 128, 256, 256, 512, 512)
model_name = '3dflex_2_t_minst'

# Data

In [6]:
# https://github.com/Harry-Zhi/3DMNIST/blob/master/3DMNIST.ipynb
def img_to_point_cloud(input_image, voxel):

    non_zero_coord = np.transpose(np.nonzero(input_image))
    
    # dict for fast looking of neighboor ocupancy
    non_zero_dict = {}
    for i in range(input_image.shape[0]):
        for j in range(input_image.shape[1]):
            non_zero_dict[str([i,j])] = any(np.all([i,j] == non_zero_coord, axis=1))

    cloud = []
    
    for n in range(len(non_zero_coord)):
        x = non_zero_coord[n][0]
        y = non_zero_coord[n][1]
            
        components = [0,1]
        
        # top
        if not non_zero_dict[str([x-1, y])]:
            components.append(2)
        
        # bottom
        if not non_zero_dict[str([x+1, y])]:
            components.append(3)
        
        # left
        if not non_zero_dict[str([x, y-1])]:
            components.append(4)
        
        # right
        if not non_zero_dict[str([x, y+1])]:
            components.append(5)
        
        pixel_cloud = np.concatenate(voxel[components])
                
        # move the voxel to its position
        pixel_cloud[:,0] +=x
        pixel_cloud[:,1] += y
        
        cloud.append(pixel_cloud)
    
    cloud = np.concatenate(cloud)
    
    # make max range 0-1
    xyzmin = np.min(cloud[:,:3], axis=0)
    xyzmax = np.max(cloud[:,:3], axis=0)
    diff = xyzmax - xyzmin
    cloud[:,:3] = ((cloud[:,:3] - xyzmin[np.argmax(diff)]) / diff[np.argmax(diff)])
    
    # 0 mean
    cloud[:,:3] -= np.mean(cloud[:,:3], axis=0)

    return cloud

In [7]:
MIN_X, MAX_X = (-0.5, 0.5)
MIN_Y, MAX_Y = (-0.5, 0.5)
MIN_Z, MAX_Z = (-3, 3)

N_X = 5
N_Y = 5
N_Z = 30

In [8]:
# VOXEL CREATION
# with normals

front = np.array(np.meshgrid(np.linspace(MIN_X, MAX_X, N_X),
                             np.linspace(MIN_Y, MAX_Y, N_Y),
                             MAX_Z )).T.reshape(-1,3) 
front = np.concatenate((front, [[1,0,0]] * len(front)), axis=1)


back = np.array(np.meshgrid(np.linspace(MIN_X, MAX_X, N_X), 
                            np.linspace(MIN_Y, MAX_Y, N_Y), 
                            MIN_Z )).T.reshape(-1,3)    
back = np.concatenate((back, [[-1,0,0]] * len(back)), axis=1)


top = np.array(np.meshgrid(MIN_X,
                           np.linspace(MIN_Y, MAX_Y, N_Y), 
                           np.linspace(MIN_Z, MAX_Z, N_Z))).T.reshape(-1,3)
top = np.concatenate((top, [[0,0,1]] * len(top)), axis=1)


bottom = np.array(np.meshgrid(MAX_X, 
                              np.linspace(MIN_Y, MAX_Y, N_Y), 
                              np.linspace(MIN_Z, MAX_Z, N_Z))).T.reshape(-1,3) 
bottom = np.concatenate((bottom, [[0,0,-1]] * len(bottom)), axis=1)


left = np.array(np.meshgrid(np.linspace(MIN_X, MAX_X, N_X), 
                            MIN_Y,
                            np.linspace(MIN_Z, MAX_Z, N_Z))).T.reshape(-1,3)
left = np.concatenate((left, [[0,-1,0]] * len(left)), axis=1)


right = np.array(np.meshgrid(np.linspace(MIN_X, MAX_X, N_X), 
                             MAX_Y, 
                             np.linspace(MIN_Z, MAX_Z, N_Z))).T.reshape(-1,3)   
right = np.concatenate((right, [[0,1,0]] * len(right)), axis=1)


voxel = np.array([front, back, top, bottom, left, right])

In [9]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()


In [10]:
from scipy.spatial import cKDTree

def cloud_to_tftree(xv, K=5):
    positions = xv[:, :3]
    features = xv[:, 3:]
    tree = cKDTree(positions)
    neighbors = tree.query(positions, k=K)[1]

    positions = positions.transpose(1, 0).astype(np.float32)
    features = features.transpose(1, 0).astype(np.float32)
    neighbors = neighbors.transpose(1, 0).astype(np.int32)
    return features, positions, neighbors

In [11]:
cloud_to_tftree(img_to_point_cloud(x_train[0], voxel))

(array([[1., 1., 1., ..., 0., 0., 0.],
        [0., 0., 0., ..., 1., 1., 1.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 array([[-0.47305447, -0.47305447, -0.47305447, ...,  0.5019455 ,
          0.51444554,  0.52694553],
        [-0.1094358 , -0.0969358 , -0.0844358 , ..., -0.1094358 ,
         -0.1094358 , -0.1094358 ],
        [ 0.15      ,  0.15      ,  0.15      , ...,  0.15      ,
          0.15      ,  0.15      ]], dtype=float32),
 array([[  345,   196,   197, ..., 25364, 25369, 25699],
        [    0,     1,     2, ..., 25697, 25698, 25374],
        [  195,   191,   192, ..., 25692, 25693, 25549],
        [  340,   345,     7, ..., 25696, 25699, 25694],
        [  190,   195,     3, ..., 25359, 25549, 25544]], dtype=int32))

In [66]:
class DataLoader(tf.keras.utils.Sequence):
    
    def __init__(self, x_train, y_train, batch_size=4):
        super().__init__()
        self.batch_size = batch_size
        self.x_train = x_train
        self.y_train = y_train
    
    def __getitem__(self, index):
        start = index * self.batch_size
        end = min(start + self.batch_size, len(self.x_train))

        xx = [self.x_train[i] for i in range(start, end)]
        yy = [self.y_train[i] for i in range(start, end)]
        
        yy = np.stack(yy, 0)
        
        x2 = []
        for x in xx:
            try:
                x = img_to_point_cloud(x, voxel)
            except KeyError as e:
                print('error', index, e)                
                x = np.zeros((512, 6))
            idx = np.random.choice(range(len(x)), size=img_cols, replace=False)
            x = cloud_to_tftree(x[idx])
            x2.append(x)
#         x2 = np.stack(x2, 0)
        x2 = [np.stack(x) for x in zip(*x2)]
        return x2, yy
    
    def __len__(self):
        return len(self.x_train)//self.batch_size-1


train_gen = DataLoader(x_train, y_train, batch_size=batch_size)
test_gen = DataLoader(x_test, y_test, batch_size=batch_size)
train_gen

<__main__.DataLoader at 0x7f02759a12e8>

# Model

In [13]:
# # model
# # from https://github.com/mikelane/SegNet/blob/master/network.py
from typing import Tuple, TypeVar

from tensorflow.python.keras.layers import concatenate, Conv2D, Conv2DTranspose, MaxPooling2D, BatchNormalization, Lambda, Input, ReLU, Conv1D, Dense, Reshape
from tensorflow.python.keras.layers.advanced_activations import LeakyReLU
from tensorflow.python.keras.models import Model, Sequential
from tensorflow.python.keras.optimizers import Adam


In [14]:
from layers import FlexConvolution, FlexPooling

class DownsampleGroup(tf.keras.Model):
    def __init__(
            self, num_filters: int, pool_strides=2, leaky_alpha=0.3):
        """
        A Unet downsampling group using flex-net operations
        """
        super().__init__()
        self.num_filters = num_filters
        self.leaky_alpha = leaky_alpha
        self.pool_strides = pool_strides
        
    def build(self, input_shape):
        

        input_features_shape, input_positions_shape, input_neighbors_shape = input_shape
        
        input_features = Input(shape=[s.value for s in input_features_shape[1:]])
        input_positions = Input(shape=[s.value for s in input_positions_shape[1:]])
        input_neighbors = Input(shape=[s.value for s in input_neighbors_shape[1:]], dtype='int32')
        
        self.flex0 = FlexConvolution(
                     input_features,
                     input_positions,
                     input_neighbors,
                     filters=self.num_filters,
                     activation=None,
                     kernel_initializer=None,
                     position_bias_initializer=tf.zeros_initializer(),
                     features_bias_initializer=tf.zeros_initializer(),
                     use_feature_bias=True,
                     data_format='simple',
                     trainable=True,
                     name=None)
#         self.bn = BatchNormalization(axis=1)
        self.act = LeakyReLU(alpha=self.leaky_alpha)
        self.pool = FlexPooling(
                input_features,
                 input_neighbors,
                 data_format='simple',
                 name=None)

#         conv_trans_features = Input(shape=[self.num_filters, input_features_shape[2]])
#         self.conv_trans = FlexConvolutionTranspose(
#             conv_trans_features,
#             input_positions,
#             input_neighbors,
#             filters=self.num_filters,
#             activation=None,
#             kernel_initializer=None,
#             position_bias_initializer=tf.zeros_initializer(),
#             features_bias_initializer=tf.zeros_initializer(),
#             use_feature_bias=True,
#             data_format="simple",
#             trainable=True,
#             name=None,
#         )
        
        self.subsample_f = Lambda(lambda x: x[:, :, :input_features.shape[2]//self.pool_strides], output_shape=(self.num_filters, input_features.shape[2]//self.pool_strides))
        self.subsample_p = Lambda(lambda x: x[:, :, :input_positions.shape[2]//self.pool_strides], output_shape=(input_positions.shape[1], input_positions.shape[2]//self.pool_strides))
        self.subsample_n = Lambda(lambda x: x[:, :, :input_neighbors.shape[2]//self.pool_strides], output_shape=(input_neighbors.shape[1], input_neighbors.shape[2]//self.pool_strides))
        
        
    def call(self, inputs, training=False):
        
        input_to_group, positions, neighborhoods = inputs
        
        conv_1 = self.flex0.apply([input_to_group, positions, neighborhoods])
#         conv_1 = self.bn(conv_1, training=training)
        conv_1 = self.act(conv_1)
    
        # FIXME the pooling op is unstable and more than 1 downsample module will result in NaN's after a few steps of training
        # you can see this clearly by running this notebook without cuda
        output = self.pool.apply([conv_1, neighborhoods])
#         output = self.conv_trans.apply([conv_1, positions, neighborhoods]) 
        output = self.subsample_f(conv_1)
        
        positions = self.subsample_p(positions)
        
        neighborhoods = self.subsample_n(neighborhoods)
    
        return output, positions, neighborhoods

In [15]:
from typing import Tuple
from layers import FlexConvolutionTranspose, FlexConvolution, FlexPooling


class Network2:
    def __init__(self, 
         input_height: int,
         input_channels: int,
         network_channel_sizes: Tuple[int, ...],
         channels_last: bool = True,
         conv_padding: str = 'same',
         down_conv_kernel: Tuple[int, int] = (3, 3),
         up_conv_kernel: Tuple[int, int] = (2, 2),
         up_conv_kernel_strides: int = 2,
         leaky_alpha=0.3,
         pool_size: Tuple[int, int] = (2, 2),
         pool_strides: int = 2,
         input_positions: int = 3,
         output_dim: int = 128,
         k: int = 5,
         downsample_layers = 2,
                ):
        self.input_features = Input(shape=(input_channels, input_height))
        self.input_positions = Input(shape=(input_positions, input_height))
        self.input_neighbors = Input(shape=(k, input_height), dtype='int32')
        self.inputs1 = [self.input_features, self.input_positions, self.input_neighbors]
        
        self.output_dim = output_dim
        self.leaky_alpha = leaky_alpha
        self.downsample_layers = downsample_layers
        
        self.inputs = [self.input_features, self.input_positions, self.input_neighbors]
        
        self.seq = []
        for i in range(self.downsample_layers):
            self.seq.append(DownsampleGroup(network_channel_sizes[i], leaky_alpha=leaky_alpha))

        self.subtract = Lambda(lambda x: x[0]-x[1], output_shape=(network_channel_sizes[i], 1))
    
        self.reshape = Reshape((network_channel_sizes[i] * input_height//(2**self.downsample_layers),))

    def get_model(self):
        
        inputs1 = self.inputs1
        
        for i in range(self.downsample_layers):
            inputs1 = self.seq[i].apply(inputs1)
        
        
        outputs = inputs1[0]
        outputs = self.reshape(outputs)
    
#         outputs = self.reshape(inputs1[0])
        
#         outputs = Dense(128)(outputs)
#         outputs = BatchNormalization(axis=1)(outputs)
#         outputs = LeakyReLU(alpha=self.leaky_alpha)(outputs)
        
#         outputs = Dense(64)(outputs)
#         outputs = BatchNormalization(axis=1)(outputs)
#         outputs = LeakyReLU(alpha=self.leaky_alpha)(outputs)
        
        outputs = Dense(1)(outputs)
        
        self.model = Model(inputs=self.inputs, outputs=outputs)

        return self.model
    


# Init

In [16]:
net1 = Network2(
               input_height=img_cols,
               input_channels=img_channels,
               network_channel_sizes=network_channel_sizes,
               leaky_alpha=0.1, k=k)
net = net1.get_model()

net.compile(
    optimizer=Adam(lr=1e-7),
    loss='mean_absolute_error',
    metrics=['mean_absolute_error'])

net.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 3, 512)       0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 3, 512)       0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 5, 512)       0                                            
__________________________________________________________________________________________________
downsample_group (DownsampleGro [(None, 8, 256), (No 104         input_1[0][0]                    
                                                                 input_2[0][0]                    
          

In [17]:
d = net1.seq[0]
d.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flex_convolution (FlexConvol (None, 8, 512)            104       
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 8, 512)            0         
_________________________________________________________________
flex_pooling (FlexPooling)   (None, 8, 512)            0         
_________________________________________________________________
lambda_1 (Lambda)            (None, 8, 256)            0         
_________________________________________________________________
lambda_2 (Lambda)            (None, 3, 256)            0         
_________________________________________________________________
lambda_3 (Lambda)            (None, 5, 256)            0         
Total params: 104
Trainable params: 104
Non-trainable params: 0
_________________________________________________________________


# Run

In [18]:
len(test_gen)
test_gen[200]

([array([[[-1., -1.,  1., ...,  0.,  0.,  0.],
          [ 0.,  0.,  0., ..., -1., -1.,  0.],
          [ 0.,  0.,  0., ...,  0.,  0., -1.]]], dtype=float32),
  array([[[-0.32888693,  0.07111307,  0.37111306, ..., -0.14138693,
           -0.16638693, -0.17888692],
          [-0.27791518, -0.12791519, -0.2904152 , ..., -0.12791519,
           -0.12791519,  0.3720848 ],
          [-0.15      , -0.15      ,  0.15      , ..., -0.11896551,
           -0.08793104, -0.04655172]]], dtype=float32),
  array([[[  0,   1,   2, ..., 509, 510, 511],
          [ 74, 162, 503, ..., 510, 322, 297],
          [265, 216, 255, ..., 322, 509, 228],
          [414, 358, 337, ..., 481, 181, 277],
          [ 49, 106, 230, ..., 181, 394,  67]]], dtype=int32)],
 array([3], dtype=uint8))

In [19]:
len(train_gen)
train_gen[100]

([array([[[-1.,  0.,  0., ...,  0.,  0.,  0.],
          [ 0., -1.,  1., ...,  0.,  0.,  0.],
          [ 0.,  0.,  0., ..., -1., -1.,  1.]]], dtype=float32),
  array([[[ 0.33352602,  0.258526  ,  0.471026  , ..., -0.24147399,
           -0.341474  , -0.441474  ],
          [ 0.0800578 , -0.0199422 ,  0.2300578 , ..., -0.0199422 ,
            0.2675578 , -0.0074422 ],
          [-0.15      ,  0.05689655, -0.0775862 , ...,  0.00517241,
           -0.09827586,  0.06724138]]], dtype=float32),
  array([[[  0,   1,   2, ..., 509, 510, 511],
          [418, 450, 322, ..., 434,  43, 461],
          [327,  15,  36, ..., 416, 278, 309],
          [ 66,  49, 430, ..., 323, 361, 424],
          [ 99, 456, 353, ..., 388, 291, 265]]], dtype=int32)],
 array([5], dtype=uint8))

In [69]:
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras_tqdm import TQDMNotebookCallback

for epoch in range(num_epochs):
    epoch_model_path = 'outputs/{model_name:}_epoch_{epoch:}.h5'.format(model_name=model_name, epoch=epoch)
    net.fit_generator(generator=train_gen,
                                validation_data=test_gen,
                      verbose=0,
                      shuffle=True,
                    callbacks=[
                        TQDMNotebookCallback()
#                                     TensorBoard(log_dir='logs/'),
#                                     ModelCheckpoint(epoch_model_path,
#                                                        monitor='loss',
#                                                        save_best_only=True,
#                                                        save_weights_only=True,
#                                                        mode='auto',
#                                                        period=1)
                              ]
                       )


HBox(children=(IntProgress(value=0, description='Training', max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, description='Epoch 0', max=59999), HTML(value='')))

error 53214 '[28, 14]'
error 27485 '[28, 16]'


KeyboardInterrupt: 