In [1]:
import os
import glob
import shutil
from pathlib import Path
import random
import numpy
import tensorflow as tf

from model_builder import model_builder, relabel, class_merger, balancer
import tools_keras
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import resnet_v2, vgg19, efficientnet

random.seed(42)
numpy.random.seed(42)
tf.random.set_seed(42)

In [2]:
specs = {
    'chips': "../../chips_gb/32_shuffled/",
    'chips_combined': "../../chips_gb/32_shuffled_combined_12_named/",
    'chips_balanced': "../../chips_gb/32_shuffled_sample_12_named/",
    'folder': "../../urbangrammar_samba/spatial_signatures/ai/gb_32_sample/",
}

In [3]:
group_mapping = [
    ['9_0', '9_1', '9_2', '9_4', '9_5'],
    ['2_0'], 
    ['2_1'], 
    ['2_2'],
    ['1_0'], 
    ['3_0'], 
    ['5_0'], 
    ['6_0'], 
    ['8_0'],
    ['0_0'],
    ['4_0'],
    ['7_0']
]

group_naming = [
    "Urbanity", 
    "Dense residential neighbourhoods",
    "Connected residential neighbourhoods",
    "Dense urban neighbourhoods",
    "Accessible suburbia",
    "Open sprawl",
    "Warehouse_Park land",
    "Gridded residential quarters",
    "Disconnected suburbia",
    "Countryside agriculture", 
    "Wild countryside", 
    "Urban buffer"
]

In [4]:
# for subset in ["train", "validation", "secret"]:
#     total = 3500 if subset == "train" else 500
#     os.makedirs(specs['chips_balanced'] + subset, exist_ok=True)
    
#     for folder in glob.glob(specs["chips_combined"] + f"{subset}/*"):
#         os.makedirs(specs['chips_balanced'] + subset + "/" + Path(folder).name, exist_ok=True)
#         files = glob.glob(folder + "/*")
#         random.shuffle(files)
#         for f in files[:total]:
#             f = Path(f)
#             shutil.copy(f, specs['chips_balanced'] + subset + "/" + Path(folder).name + "/" + f.name)

In [4]:
model_specs = {
    'meta_class_map': group_mapping,
    'meta_class_names': group_naming,
    'meta_chip_size': 32,
}


In [70]:
model = model_builder(
    model_name="efficientnet", 
    bridge="pooling", 
    top_layer_neurons=256,
    n_labels=12,
    input_shape=(224, 224, 3),
    metrics=["accuracy"]
)

In [71]:
h = tools_keras.fit_phase(
        model,
        specs['chips_balanced'] + 'train',
        specs['chips_balanced'] + 'validation',
        specs['chips_balanced'] + 'secret',
        log_folder=specs["folder"] + "logs",
        pred_folder=specs["folder"] + "pred",
        model_folder=specs["folder"] + "model",
        json_folder=specs["folder"] + "json",
        specs=model_specs,
        epochs=2,
        patience=5,
        batch_size=32,
        verbose=True,
    )

Model: "efficientnet_pooling_256_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_10 (InputLayer)       [(None, 224, 224, 3)]     0         
                                                                 
 resizing_4 (Resizing)       (None, 224, 224, 3)       0         
                                                                 
 efficientnetb4 (Functional)  (None, 7, 7, 1792)       17673823  
                                                                 
 global_average_pooling2d_4   (None, 1792)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
 dense_8 (Dense)             (None, 256)               459008    
                                                                 
 dense_9 (Dense)             (None, 12)                3084      
                                       

## verify results

In [72]:
datagen = keras.preprocessing.image.ImageDataGenerator()
generator = datagen.flow_from_directory(
    "../../chips_gb/32_shuffled_sample_12_named/train/",
    target_size=(224, 224),
    batch_size=32,
    class_mode='sparse',
    shuffle=False)

Found 42000 images belonging to 12 classes.


In [73]:
def accuracy(y, y_pred):
    a = tf.keras.metrics.Accuracy()
    a.update_state(y, y_pred)
    return a.result().numpy()

In [74]:
oy_pred_probs = model.predict(generator)
oy_pred = numpy.argmax(oy_pred_probs, axis=1)
y = generator.labels

In [75]:
accuracy(y, oy_pred)

0.47445238

In [20]:
generator.class_indices

{'accessible_suburbia': 0,
 'connected_residential_neighbourhoods': 1,
 'countryside_agriculture': 2,
 'dense_residential_neighbourhoods': 3,
 'dense_urban_neighbourhoods': 4,
 'disconnected_suburbia': 5,
 'gridded_residential_quarters': 6,
 'open_sprawl': 7,
 'urban_buffer': 8,
 'urbanity': 9,
 'warehouse_park_land': 10,
 'wild_countryside': 11}

In [76]:
model.save("model_folder", save_format="tf")
model.save("model.h5", save_format="h5")

2022-03-24 11:02:51.694071: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: model_folder/assets


  layer_config = serialize_layer_fn(layer)
  return generic_utils.serialize_keras_object(obj)


In [77]:
model_folder = keras.models.load_model("model_folder")

In [78]:
y_pred_probs = model_folder.predict(generator)
y_pred = numpy.argmax(y_pred_probs, axis=1)

In [79]:
accuracy(y, y_pred)

0.47445238

In [80]:
model_h5 = keras.models.load_model("model.h5")

In [81]:
y_pred_probs = model_h5.predict(generator)
y_pred = numpy.argmax(y_pred_probs, axis=1)

In [82]:
accuracy(y, y_pred)

0.27319047