# STL RNN

The goal of this notebook is to convert the char RNN example from the rnn_examples notebook into one that can process STL files.

## Setup

In [104]:
# setup
from __future__ import absolute_import, division, print_function

import tensorflow as tf
tf.enable_eager_execution()

import numpy as np
import os
import time

## Data

An RNN processes data sequentially and is able to maintain an understanding of the __context__ between the current input and the last _M_ inputs.

When this concept is applied to STL files, we want the RNN to understand that triangle $T_{i}$ must share vertices with triangle $T_{i-1}$. 

Therefore, we will form our input to be 2d matrices of triangles. Each input will be a vector of shape

$$ [ x_1, y_1, z_1, x_2, y_2, z_2, x_3, y_3, z_3 ] $$

An alternate approach is to use vertices as our input. This would have a shape of

$$ [ x, y, z ] $$

Both of the above make a powerful assumption: the vertices in the STL file are in order (i.e. triangle $T_{i}$ is next to  triangle $T_{i-1}$ in the STL file). A cursory glance of several STL files confirm that the triangles are in order, but a lengthier EDA of this assumption is recommended.

In [105]:
import env
from data.thingi10k import Thingi10k

thingi = Thingi10k.init10()
n_samples = len(thingi)
n_samples

10

In [106]:
#### For Triangles
#tri_dataset = tf.data.Dataset.from_generator(
#    thingi.triangle_batchmaker,
#    output_types=tuple(([np.float32]*9)),
#)

#for i, tri in enumerate(tri_dataset.take(3)):
#    print('Triangle {}'.format(i))
#    for vtx in tri:
#        print(vtx)

### For Vertices
vtx_dataset = tf.data.Dataset.from_generator(
    thingi.vertex_batchmaker,
    output_types=(np.float32),
)

for v in vtx_dataset.take(3):
    print(v)


tf.Tensor(0.42292082, shape=(), dtype=float32)
tf.Tensor(0.5453869, shape=(), dtype=float32)
tf.Tensor(0.60241675, shape=(), dtype=float32)


In [107]:
seq_length = 100
#assert seq_length % 9 == 0

#sequences = tri_dataset.batch(seq_length+1, drop_remainder=True)
sequences = vtx_dataset.batch(seq_length, drop_remainder=True)

for tri_list in sequences.take(2):
    # each tri_list is a list of seq_length+1 triangles
    print(tri_list.shape)
    print(tri_list)

(100,)
tf.Tensor(
[0.42292082 0.5453869  0.60241675 0.4233776  0.544988   0.60241675
 0.42399296 0.5444503  0.60241675 0.42399296 0.5444503  0.6092169
 0.42292082 0.5453869  0.6092169  0.42399296 0.5444503  0.60241675
 0.42292082 0.5453869  0.6092169  0.41808274 0.55827796 0.6092169
 0.42193684 0.546416   0.6092169  0.4210492  0.54752916 0.6092169
 0.42193684 0.546416   0.60241675 0.42193684 0.546416   0.6092169
 0.4210492  0.54752916 0.60241675 0.42193684 0.546416   0.59879
 0.42193684 0.546416   0.60241675 0.41808274 0.5554332  0.59879
 0.42292082 0.5453869  0.59879    0.42193684 0.546416   0.59879
 0.41808274 0.5554332  0.59879    0.42588705 0.5431698  0.59879
 0.42514473 0.54361343 0.59879    0.42514473 0.54361343 0.60241675
 0.42514473 0.54361343 0.59879    0.42588705 0.5431698  0.59879
 0.42514473 0.54361343 0.60241675 0.42588705 0.5431698  0.60241675
 0.42636693 0.5428831  0.60241675 0.42636693 0.5428831  0.60241675
 0.42636693 0.5428831  0.59879    0.4276497  0.54226536 0.59879

In [152]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    # THANK YOU https://stackoverflow.com/questions/48823848/dataset-api-does-not-pass-dimensionality-information-for-its-output-tensor-when
    input_text.set_shape((99))
    target_text.set_shape((99))
    return input_text, target_text

dataset = sequences.map(split_input_target)
dataset

<MapDataset shapes: ((99,), (99,)), types: (tf.float32, tf.float32)>

In [153]:
for input_example, target_example in dataset.take(1):
    print(input_example.shape)
    print('Input data: ', input_example)
    print(target_example.shape)
    print('Target data:', target_example)

(99,)
Input data:  tf.Tensor(
[0.42292082 0.5453869  0.60241675 0.4233776  0.544988   0.60241675
 0.42399296 0.5444503  0.60241675 0.42399296 0.5444503  0.6092169
 0.42292082 0.5453869  0.6092169  0.42399296 0.5444503  0.60241675
 0.42292082 0.5453869  0.6092169  0.41808274 0.55827796 0.6092169
 0.42193684 0.546416   0.6092169  0.4210492  0.54752916 0.6092169
 0.42193684 0.546416   0.60241675 0.42193684 0.546416   0.6092169
 0.4210492  0.54752916 0.60241675 0.42193684 0.546416   0.59879
 0.42193684 0.546416   0.60241675 0.41808274 0.5554332  0.59879
 0.42292082 0.5453869  0.59879    0.42193684 0.546416   0.59879
 0.41808274 0.5554332  0.59879    0.42588705 0.5431698  0.59879
 0.42514473 0.54361343 0.59879    0.42514473 0.54361343 0.60241675
 0.42514473 0.54361343 0.59879    0.42588705 0.5431698  0.59879
 0.42514473 0.54361343 0.60241675 0.42588705 0.5431698  0.60241675
 0.42636693 0.5428831  0.60241675 0.42636693 0.5428831  0.60241675
 0.42636693 0.5428831  0.59879    0.4276497  0.5422

In [154]:
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
    print("Step {:4d}".format(i))
    print("  input: {}".format(input_idx))
    print("  expected output: {}".format(target_idx))

Step    0
  input: 0.422920823097229
  expected output: 0.5453869104385376
Step    1
  input: 0.5453869104385376
  expected output: 0.6024167537689209
Step    2
  input: 0.6024167537689209
  expected output: 0.4233776032924652
Step    3
  input: 0.4233776032924652
  expected output: 0.5449879765510559
Step    4
  input: 0.5449879765510559
  expected output: 0.6024167537689209


## Create Training Batches

In [155]:
# Batch size 
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences, 
# so it doesn't attempt to shuffle the entire sequence in memory. Instead, 
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

<BatchDataset shapes: ((64, 99), (64, 99)), types: (tf.float32, tf.float32)>

## Build The Model

In [156]:
# Length of the vocabulary in chars
vocab_size = 1000

# The embedding dimension 
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [157]:
if tf.test.is_gpu_available():
    rnn = tf.keras.layers.CuDNNGRU
else:
    import functools
    rnn = functools.partial(tf.keras.layers.GRU, recurrent_activation='sigmoid')

In [158]:
def build_model(seq_length, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Reshape((seq_length-1, 1)),
        rnn(rnn_units,
            return_sequences=True, 
            recurrent_initializer='glorot_uniform',
            stateful=True),
        #tf.keras.layers.LSTM(rnn_units, input_shape=(batch_size, seq_length, 1)),
        tf.keras.layers.Dense(seq_length-1)
    ], name='test')
    return model

In [159]:
model = build_model(
    seq_length=seq_length,
    rnn_units=rnn_units, 
    batch_size=BATCH_SIZE)

In [167]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, 
                                  batch_input_shape=[batch_size, None]),
        rnn(rnn_units,
            return_sequences=True, 
            recurrent_initializer='glorot_uniform',
            stateful=True),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model

model = build_model(
  vocab_size = vocab_size, 
  embedding_dim=embedding_dim, 
  rnn_units=rnn_units, 
  batch_size=BATCH_SIZE)

## Try The Model

In [168]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 99, 1000) # (batch_size, sequence_length, vocab_size)


In [169]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           256000    
_________________________________________________________________
gru_11 (GRU)                 (64, None, 1024)          3935232   
_________________________________________________________________
dense_13 (Dense)             (64, None, 1000)          1025000   
Total params: 5,216,232
Trainable params: 5,216,232
Non-trainable params: 0
_________________________________________________________________


In [170]:
sampled_indices = tf.random.multinomial(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

In [171]:
sampled_indices

array([770, 629, 737, 426, 154, 589, 950, 578, 418, 374,  55, 557, 569,
       317, 631, 749,  19, 221, 923, 696, 391,  49, 435, 913, 561, 680,
       360, 126, 191, 302, 819, 705, 650, 679, 672, 457, 130, 870, 543,
       899, 919,  16,  47, 487, 753, 167, 280, 825,  11, 823, 319, 132,
         8, 835, 515, 862, 301, 855, 411, 988, 198, 905, 488, 981, 850,
       468, 982, 182,  31,  95, 622, 306, 322, 409,  25, 368, 229,  31,
       619, 558, 242, 585, 391, 353, 197, 646, 857, 401, 472, 945, 262,
       887, 482,  45, 920, 524, 352, 870, 475])

## Optimizer & Loss

In [172]:
def loss(labels, logits):
    return tf.keras.backend.sparse_categorical_crossentropy(labels, logits, from_logits=True)

example_batch_loss  = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)") 
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (64, 99, 1000)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       6.891618


## Compile Model

In [173]:
model.compile(
    optimizer=tf.train.AdamOptimizer(),
    loss=loss)

## Configure Checkpoints

In [174]:
# Directory where the checkpoints will be saved
checkpoint_dir = './_output/stl_rnn_training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

## Execute Training

In [175]:
model = build_model(
    seq_length=seq_length,
    rnn_units=rnn_units, 
    batch_size=BATCH_SIZE)

TypeError: build_model() got an unexpected keyword argument 'seq_length'

In [176]:
optimizer = tf.train.AdamOptimizer()

In [177]:
# Training step
EPOCHS = 1

for epoch in range(EPOCHS):
    start = time.time()
    
    # initializing the hidden state at the start of every epoch
    # initally hidden is None
    hidden = model.reset_states()
    
    for (batch_n, (inp, target)) in enumerate(dataset):
        with tf.GradientTape() as tape:
            # feeding the hidden state back into the model
            # This is the interesting step
            predictions = model(inp)
            loss = tf.losses.sparse_softmax_cross_entropy(target, predictions)
              
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        if batch_n % 100 == 0:
            template = 'Epoch {} Batch {} Loss {:.4f}'
            print(template.format(epoch+1, batch_n, loss))

    # saving (checkpoint) the model every 5 epochs
    if (epoch + 1) % 5 == 0:
        model.save_weights(checkpoint_prefix.format(epoch=epoch))

    print ('Epoch {} Loss {:.4f}'.format(epoch+1, loss))
    print ('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

model.save_weights(checkpoint_prefix.format(epoch=epoch))

InternalError: Could not find valid device for node.
Node: {{node SparseSoftmaxCrossEntropyWithLogits}} = SparseSoftmaxCrossEntropyWithLogits[T=DT_FLOAT, Tlabels=DT_FLOAT](dummy_input, dummy_input)
All kernels registered for op SparseSoftmaxCrossEntropyWithLogits :
  device='XLA_CPU_JIT'; Tlabels in [DT_INT32, DT_INT64]; T in [DT_FLOAT, DT_DOUBLE, DT_HALF]
  device='XLA_CPU'; Tlabels in [DT_INT32, DT_INT64]; T in [DT_FLOAT, DT_DOUBLE, DT_HALF]
  device='CPU'; T in [DT_HALF]; Tlabels in [DT_INT64]
  device='CPU'; T in [DT_HALF]; Tlabels in [DT_INT32]
  device='CPU'; T in [DT_DOUBLE]; Tlabels in [DT_INT64]
  device='CPU'; T in [DT_DOUBLE]; Tlabels in [DT_INT32]
  device='CPU'; T in [DT_FLOAT]; Tlabels in [DT_INT64]
  device='CPU'; T in [DT_FLOAT]; Tlabels in [DT_INT32]
 [Op:SparseSoftmaxCrossEntropyWithLogits] name: xentropy