In [1]:
import os
import glob
import shutil
from pathlib import Path
import random
import numpy
import tensorflow as tf
import pandas

from model_builder import model_builder, relabel, class_merger, balancer
import tools_keras
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import resnet_v2, vgg19, efficientnet

random.seed(42)
numpy.random.seed(42)
tf.random.set_seed(42)

In [None]:
for chip_size in [8, 16, 32, 64]:
    specs = {
        'chips': f"../../chips_gb/v2_{chip_size}_multi.npy",
        'labels': f"../../chips_gb/v2_{chip_size}_multi_labels.parquet",
        'folder': f"../../ai/v2_gb_{chip_size}_multi/",
    }

    chips = numpy.load(specs['chips'])
    labels = pandas.read_parquet(specs['labels'])

    labels["signature_type"] = labels.drop(columns=["geometry", "split"]).idxmax(axis=1)

    labels = labels[["signature_type", "split"]]

    sorter = numpy.arange(0, chips.shape[0])
    numpy.random.shuffle(sorter)
    chips = chips[sorter]
    labels = labels.iloc[sorter].reset_index(drop=True)

    group_mapping = [
        ['9_0', '9_1', '9_2', '9_4', '9_5'],
        ['2_0'], 
        ['2_1'], 
        ['2_2'],
        ['1_0'], 
        ['3_0'], 
        ['5_0'], 
        ['6_0'], 
        ['8_0'],
        ['0_0'],
        ['4_0'],
        ['7_0']
    ]

    group_naming = [
        "Urbanity", 
        "Dense residential neighbourhoods",
        "Connected residential neighbourhoods",
        "Dense urban neighbourhoods",
        "Accessible suburbia",
        "Open sprawl",
        "Warehouse_Park land",
        "Gridded residential quarters",
        "Disconnected suburbia",
        "Countryside agriculture", 
        "Wild countryside", 
        "Urban buffer"
    ]

    mask = labels.split.str.startswith('nn')
    labels_nn = labels[mask]
    chips_nn = chips[mask]

    labels_merged = class_merger(labels_nn.signature_type, group_mapping)

    train_mask = labels_nn['split'] == "nn_train"

    train_dataset = tf.data.Dataset.from_tensor_slices((chips_nn[train_mask], labels_merged[train_mask]))
    validation_dataset = tf.data.Dataset.from_tensor_slices((chips_nn[~train_mask], labels_merged[~train_mask]))

    train_dataset = train_dataset.batch(batch_size=32)
    validation_dataset = validation_dataset.batch(batch_size=32)

    model_specs = {
        'meta_class_map': group_mapping,
        'meta_class_names': group_naming,
        'meta_chip_size': chip_size,
    }

    model = model_builder(
        model_name="efficientnet", 
        bridge="pooling", 
        top_layer_neurons=256,
        n_labels=12,
        input_shape=(chip_size, chip_size, 3),
    )

    un, c = numpy.unique(labels_merged, return_counts=True)
    class_weights = dict(zip(un, c.max() / c))

    h = tools_keras.fit_phase_numpy(
            model,
            train_dataset,
            validation_dataset,
            log_folder=specs["folder"] + "logs",
            pred_folder=None,
            model_folder=specs["folder"] + "model",
            json_folder=None,
            specs=model_specs,
            epochs=200,
            patience=5,
            batch_size=32,
            verbose=True,
            class_weight=class_weights,
        )

2022-08-08 14:12:41.049455: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6619 MB memory:  -> device: 0, name: Quadro RTX 4000, pci bus id: 0000:21:00.0, compute capability: 7.5


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb4_notop.h5
Model: "efficientnet_pooling_256_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 8, 8, 3)]         0         
                                                                 
 resizing (Resizing)         (None, 224, 224, 3)       0         
                                                                 
 efficientnetb4 (Functional)  (None, 7, 7, 1792)       17673823  
                                                                 
 global_average_pooling2d (G  (None, 1792)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 256)               459008    
                                                                 
 dense_1 

  layer_config = serialize_layer_fn(layer)


Epoch 1/200


2022-08-08 14:12:52.961070: I tensorflow/stream_executor/cuda/cuda_dnn.cc:377] Loaded cuDNN version 8302




2022-08-08 14:16:52.815296: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: ../../ai/v2_gb_8_multi/model/efficientnet_pooling_256_12_best/assets


  layer_config = serialize_layer_fn(layer)
  return generic_utils.serialize_keras_object(obj)


Epoch 2/200


  layer_config = serialize_layer_fn(layer)
  return generic_utils.serialize_keras_object(obj)


Epoch 3/200


  layer_config = serialize_layer_fn(layer)
  return generic_utils.serialize_keras_object(obj)


Epoch 4/200


  layer_config = serialize_layer_fn(layer)
  return generic_utils.serialize_keras_object(obj)


Epoch 5/200