In [0]:
import numpy as np
import matplotlib.pyplot as plt
import importlib
import json
from os import path
importlib.import_module('mpl_toolkits.mplot3d').Axes3D
from tqdm import tqdm

from google.colab import drive

from __future__ import print_function
import keras
import keras.models as models
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K


In [0]:
num_of_angels = 8
num_of_views = num_of_angels + 2  # above below
theta = (2 * np.pi) / num_of_angels
batch_size = 128
num_classes = 10
epochs = 14
img_rows = img_cols = 32
input_shape = (img_rows, img_cols, 1)

my_drive_modelnet_dir = 'drive/My Drive/modelnet10'

category_names = {
    0: "bathtub",
    1: "bed",
    2: "chair",
    3: "desk",
    4: "dresser",
    5: "monitor",
    6: "night-stand",
    7: "sofa",
    8: "table",
    9: "toilet",
}


In [0]:
# drive.mount('/content/drive')

In [0]:
# prepare image set


def load():
    # drive.mount('/content/drive')
    ds = np.load('{}/modelnet10_train.npz'.format(my_drive_modelnet_dir))
    
    # ds = np.load('gdrive/My Drive/modelnet10/modelnet10_train (2).npz')
    # ds = np.load('/home/mypc/bgu/grapgics/modelnet10_train.npz')
    samples = ds['samples']
    labels = ds['labels']
    return samples, labels


def get_photos_labels(force_reload=False):
    samples, labels = load()
    # print("samples shape: %s, labels shape: %s" %(samples.shape, labels.shape))

    photos_path = '{}/photos.json'.format(my_drive_modelnet_dir)
    if path.exists(photos_path) and not force_reload:
        with open(photos_path, 'r') as f:
            photos = json.load(f)
        return photos, labels

    im = np.zeros((32, 32))
    lin = np.linspace(0, 31, 32)

    _photos = []

    # for pc in samples:
    print("len(samples): ", len(samples))
    for _i in tqdm(range(len(samples))):
        pc = samples[_i]
        vol = pointcloud2volume(pc)
        
        # one from above
        for y in range(32):
            for z in range(32):
                im[y, z] = np.argmax(np.multiply(vol[y, z, :], lin))
        _photos.append(im.astype(np.int).tolist())

        # one from below
        for y in range(32):
            for z in range(32):
                flipped_vol = np.flip(vol)
                im[y, z] = np.argmax(np.multiply(flipped_vol[y, z, :], lin))
        _photos.append(im.astype(np.int).tolist())


        # 12 from sides
        for i in range(0, num_of_angels):
            pc = rotate(pc, theta)
            vol = pointcloud2volume(pc)

            for y in range(32):
                for z in range(32):
                    im[y, z] = np.argmax(np.multiply(vol[:, y, z], lin))
            im = np.rot90(im)
            _photos.append(im.astype(np.int).tolist())
          
        

    print("len(_photos): ", len(_photos))
    with open(photos_path, 'w') as f:
        json.dump(_photos, f)

    return _photos, labels



In [0]:
# point cloud

def pointcloud2volume(pc, dim=32):
    vol = np.zeros((dim, dim, dim))
    bins = np.linspace(-0.000001, 1.0001, 33)
    for x, point in enumerate(pc):

        i = np.digitize(point, bins) - 1
        try:
            vol[i[0], i[1], i[2]] = 1
        except Exception as e:
            print("digitize went bad", e)
            pass

        try:
            vol[i[0], i[1] + 1, i[2]] = 1
        except IndexError:
            pass
        try:
            vol[i[0], i[1] - 1, i[2]] = 1
        except IndexError:
            pass

        try:
            vol[i[0], i[1], i[2] + 1] = 1
        except IndexError:
            pass
        try:
            vol[i[0], i[1], i[2] - 1] = 1
        except IndexError:
            pass

        try:
            vol[i[0] + 1, i[1], i[2]] = 1
        except IndexError:
            pass
        try:
            vol[i[0] - 1, i[1], i[2]] = 1
        except IndexError:
            pass

    vol = np.flip(vol.T)
    vol = (vol == 1)
    return vol



In [0]:
# rotate

def _move_to_origin(pc):
    z = np.sum(pc[:, 2]) / len(pc)
    y = np.sum(pc[:, 1]) / len(pc)
    pc[:, 2] -= z
    pc[:, 1] -= y
    return pc


def _move_back_from_origin(pc):
    # move to positive space
    z_min = np.min(pc[:, 2])
    y_min = np.min(pc[:, 1])
    if z_min < 0:
        pc[:, 2] += (-z_min + 0.0001)

    if y_min < 0:
        pc[:, 1] += (-y_min + 0.0001)

    return pc


def _normalize_to_unit_cube(pc):
    z = pc[:, 2].max()
    if z > 1:
        pc[:, 2] = pc[:, 2] * (0.9/z)
        pc[:, 1] = pc[:, 1] * (0.9/z)

    y = pc[:, 1].max()
    if y > 1:
        pc[:, 2] = pc[:, 2] * (0.9/y)
        pc[:, 1] = pc[:, 1] * (0.9/y)

    return pc


def _move_to_center(pc):

    y_margin = 1 - np.max(pc[:, 1])
    z_margin = 1 - np.max(pc[:, 2])

    pc[:, 1] += (y_margin/2)
    pc[:, 2] += (z_margin/2)

    return pc


def _rotate_around_x_axes(pc, theta):
    rotation_mat = np.array(
        [[np.cos(theta), -np.sin(theta)],
         [np.sin(theta), np.cos(theta)]], dtype=np.float32)

    pc_xy = pc[:, 1:3]
    rotated = (rotation_mat @ pc_xy.T).T
    pc[:, 1:3] = rotated
    return pc


def rotate(pc, theta):
    pc = _move_to_origin(pc)
    pc = _rotate_around_x_axes(pc, theta)
    pc = _move_back_from_origin(pc)
    pc = _normalize_to_unit_cube(pc)
    pc = _move_to_center(pc)
    return pc


In [0]:
# prepare the data for keras

photos, labels = get_photos_labels(force_reload=False)
# print(len(photos), len(labels))

# thetas = []
x_train_size = 3200  # =~ 0.8*3991
x_train = []
x_test = []
for i in range(num_of_views):
    theta_i = photos[i::num_of_views]
    theta_i = np.array(theta_i, dtype=np.float32)
    theta_i /= 31
    theta_i = theta_i.reshape(theta_i.shape[0], img_rows, img_cols, 1)

    theta_i_x_train, theta_i_x_test = theta_i[:x_train_size], theta_i[x_train_size:]
    x_train.append(theta_i_x_train)
    x_test.append(theta_i_x_test)

    print(len(theta_i), "should be 3991")

print(len(x_train), '*', len(x_train[0]), 'train samples, shape is', x_train[0].shape)
print(len(x_test), '*', len(x_test[0]), 'test samples, shape is', x_test[0].shape)

# plt.imshow(thetas[0][3])
# type(thetas[0][3])

3991 should be 3991
3991 should be 3991
3991 should be 3991
3991 should be 3991
3991 should be 3991
3991 should be 3991
3991 should be 3991
3991 should be 3991
3991 should be 3991
3991 should be 3991
10 * 3200 train samples, shape is (3200, 32, 32, 1)
10 * 791 test samples, shape is (791, 32, 32, 1)


In [0]:
# data = np.array(photos)
# print(data.shape)
# data = np.split(data, data.shape[0]/18)
# print(len(data1), data1[0].shape)

In [0]:
# prepare the labels for keras

# convert class vectors to binary class matrices
y_train, y_test = labels[:x_train_size], labels[x_train_size:]
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

print("len(y_train):", len(y_train))
print("len(y_test):", len(y_test))

len(y_train): 3200
len(y_test): 791


In [0]:
# multiple inputs keras model

inputs = [keras.Input(shape=input_shape) for i in range(num_of_views)]
inputs2 = []
outputs = []
for inp in inputs:
    x = Conv2D(32, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=input_shape)(inp)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    # x = Dense(num_classes, activation='softmax')(x)
    x = keras.Model(inputs=inp, outputs=x)
    outputs.append(x.output)
    inputs2.append(x.input)

combined = keras.layers.concatenate(outputs)

z = Dense(num_classes, activation="softmax")(combined)
# z = Dense(num_classes, activation='softmax')(z)

model = keras.Model(inputs=inputs2, outputs=z)


model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])



In [0]:
# train the model
'''Trains a simple convnet on the MNIST dataset.
Gets to 99.25% test accuracy after 12 epochs
(there is still a lot of margin for parameter tuning).
16 seconds per epoch on a GRID K520 GPU.
'''

# print(x_train[0].shape)

# todo: input should be like [theta0_arr, theta1_arr, ...,theta17_arr
# for example:
# model.fit(
# 	[trainAttrX, trainImagesX], trainY,
# 	validation_data=([testAttrX, testImagesX], testY),
# 	epochs=200, batch_size=8)]


model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 3200 samples, validate on 791 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30

KeyboardInterrupt: ignored

In [0]:
# find a bad prediction
samples, labels = load()
im = np.zeros((32, 32))
lin = np.linspace(0, 31, 32)
while True:
    _photos = []
    keras_input = []
    pc_i = np.random.randint(0, 3992)
    pc = samples[pc_i]
    _label = labels[pc_i]
    # print("object is a", category_names[_label])
    # print("predicting...")
    vol = pointcloud2volume(pc)

    # one from above
    for y in range(32):
        for z in range(32):
            im[y, z] = np.argmax(np.multiply(vol[y, z, :], lin))
    _photos.append(im.astype(np.int).tolist())

    # one from below
    for y in range(32):
        for z in range(32):
            flipped_vol = np.flip(vol)
            im[y, z] = np.argmax(np.multiply(flipped_vol[y, z, :], lin))
    _photos.append(im.astype(np.int).tolist())

    # 12 from sides
    for i in range(0, num_of_angels):
        pc = rotate(pc, theta)
        vol = pointcloud2volume(pc)

        for y in range(32):
            for z in range(32):
                im[y, z] = np.argmax(np.multiply(vol[:, y, z], lin))
        im = np.rot90(im)
        _photos.append(im.astype(np.int).tolist())

    for i in range(num_of_views):
        theta_i = _photos[i]
        theta_i = np.array(theta_i, dtype=np.float32)
        theta_i /= 31
        theta_i = theta_i.reshape(1, img_rows, img_cols, 1)

        keras_input.append(theta_i)

    # print(len(keras_input), keras_input[0].shape)
    prediction = model.predict(keras_input)[0]
    label = np.argmax(prediction)
    if label != _label:
        print(pc_i)
        print(np.round(prediction, 3))
        print("object is a", category_names[_label])
        confidence = prediction[label] * 100
        print("object is a {} with {}% confidence".format(category_names[label], np.round(confidence, 2)))

        prediction[label] = 0
        label = np.argmax(prediction)
        confidence = prediction[label] * 100

        print("second place: {} with {}% confidence".format(category_names[label], np.round(confidence, 2)))
        break

2999
[0.    0.    0.    0.001 0.16  0.005 0.831 0.    0.    0.002]
object is a dresser
object is a night-stand with 83.07% confidence
second place: dresser with 16.04% confidence


In [0]:
"""

with 8 angles + top + bottom (10 overall)
maybe we can loose bootom picture - chair looks like table from below

Train on 3200 samples, validate on 791 samples
Epoch 1/12
3200/3200 [==============================] - 105s 33ms/step - loss: 1.2926 - accuracy: 0.6125 - val_loss: 0.8521 - val_accuracy: 0.7547
Epoch 2/12
3200/3200 [==============================] - 108s 34ms/step - loss: 0.5590 - accuracy: 0.8181 - val_loss: 0.4153 - val_accuracy: 0.8761
Epoch 3/12
3200/3200 [==============================] - 103s 32ms/step - loss: 0.4220 - accuracy: 0.8619 - val_loss: 0.3193 - val_accuracy: 0.8976
Epoch 4/12
3200/3200 [==============================] - 103s 32ms/step - loss: 0.3178 - accuracy: 0.8953 - val_loss: 0.2669 - val_accuracy: 0.9166
Epoch 5/12
3200/3200 [==============================] - 103s 32ms/step - loss: 0.2435 - accuracy: 0.9222 - val_loss: 0.2250 - val_accuracy: 0.9292
Epoch 6/12
3200/3200 [==============================] - 103s 32ms/step - loss: 0.2364 - accuracy: 0.9234 - val_loss: 0.2128 - val_accuracy: 0.9305
Epoch 7/12
3200/3200 [==============================] - 103s 32ms/step - loss: 0.1948 - accuracy: 0.9341 - val_loss: 0.1944 - val_accuracy: 0.9393
Epoch 8/12
3200/3200 [==============================] - 107s 33ms/step - loss: 0.1829 - accuracy: 0.9388 - val_loss: 0.1860 - val_accuracy: 0.9330
Epoch 9/12
3200/3200 [==============================] - 103s 32ms/step - loss: 0.1664 - accuracy: 0.9447 - val_loss: 0.1992 - val_accuracy: 0.9355
Epoch 10/12
3200/3200 [==============================] - 103s 32ms/step - loss: 0.1453 - accuracy: 0.9519 - val_loss: 0.1819 - val_accuracy: 0.9343
Epoch 11/12
3200/3200 [==============================] - 103s 32ms/step - loss: 0.1201 - accuracy: 0.9600 - val_loss: 0.1554 - val_accuracy: 0.9444
Epoch 12/12
3200/3200 [==============================] - 103s 32ms/step - loss: 0.1139 - accuracy: 0.9600 - val_loss: 0.1668 - val_accuracy: 0.9343
Test loss: 0.16682250029142526
Test accuracy: 0.9342604279518127

"""


"""

with 18 angle views, concat 18 fc into softmax:

Train on 3200 samples, validate on 791 samples
Epoch 1/12
3200/3200 [==============================] - 189s 59ms/step - loss: 1.2966 - accuracy: 0.5950 - val_loss: 0.6623 - val_accuracy: 0.7876
Epoch 2/12
3200/3200 [==============================] - 186s 58ms/step - loss: 0.5889 - accuracy: 0.8047 - val_loss: 0.4057 - val_accuracy: 0.8521
Epoch 3/12
3200/3200 [==============================] - 184s 58ms/step - loss: 0.4450 - accuracy: 0.8512 - val_loss: 0.3249 - val_accuracy: 0.8925
Epoch 4/12
3200/3200 [==============================] - 184s 58ms/step - loss: 0.3334 - accuracy: 0.8853 - val_loss: 0.5065 - val_accuracy: 0.8268
Epoch 5/12
3200/3200 [==============================] - 184s 58ms/step - loss: 0.2883 - accuracy: 0.9069 - val_loss: 0.2440 - val_accuracy: 0.9191
Epoch 6/12
3200/3200 [==============================] - 187s 58ms/step - loss: 0.2610 - accuracy: 0.9100 - val_loss: 0.2371 - val_accuracy: 0.9191
Epoch 7/12
3200/3200 [==============================] - 184s 58ms/step - loss: 0.2142 - accuracy: 0.9259 - val_loss: 0.2415 - val_accuracy: 0.9077
Epoch 8/12
3200/3200 [==============================] - 184s 58ms/step - loss: 0.1850 - accuracy: 0.9350 - val_loss: 0.2013 - val_accuracy: 0.9292
Epoch 9/12
3200/3200 [==============================] - 187s 58ms/step - loss: 0.1787 - accuracy: 0.9397 - val_loss: 0.1957 - val_accuracy: 0.9279
Epoch 10/12
3200/3200 [==============================] - 184s 57ms/step - loss: 0.1623 - accuracy: 0.9475 - val_loss: 0.2575 - val_accuracy: 0.9077
Epoch 11/12
3200/3200 [==============================] - 184s 57ms/step - loss: 0.1478 - accuracy: 0.9513 - val_loss: 0.1884 - val_accuracy: 0.9267
Epoch 12/12
3200/3200 [==============================] - 186s 58ms/step - loss: 0.1378 - accuracy: 0.9538 - val_loss: 0.1733 - val_accuracy: 0.9431
Test loss: 0.17325291855145344
Test accuracy: 0.9431099891662598

"""

