In [1]:
import numpy
import tensorflow as tf

from model_builder import model_builder, relabel, class_merger, balancer
import model_builder

Load chips

In [2]:
specs = {
    'tensor': (
        '../urbangrammar_samba/'
        'spatial_signatures/chips/sample.npz'
    ),
}

In [3]:
specs = {'tensor': '../sample(1).npz'}

In [4]:
data = numpy.load(specs["tensor"], allow_pickle=True)

In [5]:
chips = data["chips"]
labels = data["labels"]

Merge groups

In [6]:
group_mapping = [
    ['9_0', '9_1', '9_2', '9_4', '9_5', '2_0', '2_1', '2_2'],
    ['1_0', '3_0', '5_0', '6_0', '8_0'],
    ['0_0', '4_0', '7_0']
]
labels = class_merger(labels, group_mapping)

# If you don't want to merge groups, use `relabel` to ensure your labels are in the proper format
# labels = relabel(labels)

Drop chips from excessive classes

In [7]:
mask = balancer(labels, max_ratio=20)

Total number of selected chips: 27911 out of 65042
Counts:
 {0: 907, 1: 8864, 2: 18140}


In [8]:
labels = labels[mask]
chips = chips[mask]

Rescale

In [9]:
chips = chips / (chips.max() / 255.0)

Shuffle data to ensure full randomness before splitting to test, validation and secret.

In [10]:
numpy.random.seed(42)

shuffled_idx = numpy.arange(0, chips.shape[0])
numpy.random.shuffle(shuffled_idx)

chips = chips[shuffled_idx]
labels = labels[shuffled_idx]

Split data into train and validation.

In [11]:
batch_size = 32
split = int(chips.shape[0] * 0.8)

# assert that all labels are present in train and validation datasets
assert (numpy.unique(labels[:split]) == numpy.unique(labels[split:])).all()

train_dataset = tf.data.Dataset.from_tensor_slices((chips[:split], labels[:split]))
test_dataset = tf.data.Dataset.from_tensor_slices((chips[split:], labels[split:]))

train_dataset = train_dataset.batch(batch_size=batch_size)
test_dataset = test_dataset.batch(batch_size=batch_size)

2021-12-10 18:19:50.808920: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1050] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-10 18:19:50.870053: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1050] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-10 18:19:50.870381: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1050] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-10 18:19:50.871525: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1050] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-10 18:19:50.872044: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1050] successful NUMA node read f

Create model

In [12]:
from importlib import reload
reload(model_builder)
model = model_builder.model_builder(
    model_name="efficientnet", 
    bridge="flatten", 
    top_layer_neurons=512,
    n_labels=3,
)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb4_notop.h5


In [13]:
model.summary()

Model: "efficientnet_flatten_512_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
resizing (Resizing)          (None, 224, 224, 3)       0         
_________________________________________________________________
efficientnetb4 (Functional)  (None, 7, 7, 1792)        17673823  
_________________________________________________________________
flatten (Flatten)            (None, 87808)             0         
_________________________________________________________________
dense (Dense)                (None, 512)               44958208  
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 1539      
Total params: 62,633,570
Trainable params: 44,959,747
Non-trainable params: 17,673,823
___________________

Fit

In [14]:
%%time
epochs = 1
history = model.fit(train_dataset, epochs=epochs, validation_data=test_dataset)

2021-12-10 18:20:53.367791: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2021-12-10 18:20:54.547604: I tensorflow/stream_executor/cuda/cuda_dnn.cc:381] Loaded cuDNN version 8300




In [52]:
y_pred_probs = model.predict(chips[:100])

In [53]:
y_pred = numpy.argmax(y_pred_probs, axis=1)

In [109]:
import tools_keras
from importlib import reload
reload(tools_keras)
tools_keras.build_meta_json(
    model, 
    { # This would be filled from the training loop
        'meta_class_map': None,
        'meta_class_names': None,
        'meta_chip_size': None
    },
    test_dataset, # Taining set
    test_dataset, # Validation set
    test_dataset, # Secret set
    test_dataset, # Full set
)

ValueError: too many values to unpack (expected 2)