# AI pipeline from disk

In [1]:
import os
import glob
import shutil
from pathlib import Path
import random
import numpy
import tensorflow as tf

from model_builder import model_builder, relabel, class_merger, balancer
import tools_keras
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import resnet_v2, vgg19, efficientnet

random.seed(42)

In [2]:
specs = {
    'chips': "../chips_gb/32_temporal/",
    'chips_combined': "../chips_gb/32_temporal_combined_12/",
    'chips_balanced': "../chips_gb/32_temporal_balanced_12/",
    'folder': "../urbangrammar_samba/spatial_signatures/ai/gb_32_temporal/",
}

Combine groups

In [3]:
group_mapping = [
    ['9_0', '9_1', '9_2', '9_4', '9_5'],
    ['2_0'], 
    ['2_1'], 
    ['2_2'],
    ['1_0'], 
    ['3_0'], 
    ['5_0'], 
    ['6_0'], 
    ['8_0'],
    ['0_0'],
    ['4_0'],
    ['7_0']
]

In [4]:
for subset in ["train", "validation", "secret"]:
    os.makedirs(specs['chips_combined'] + subset, exist_ok=True)
    
    for i, group in enumerate(group_mapping):
        os.makedirs(specs['chips_combined'] + subset + "/" + str(i), exist_ok=True)
        
        for g in group:
            files = glob.glob(f"{specs['chips']}{subset}/{g}/*.tif")
            for f in files:
                f = Path(f)
                shutil.copy(f, specs['chips_combined'] + subset + "/" + str(i) + "/" + f.name)

Balance groups

In [5]:
for subset in ["train", "validation", "secret"]:
    total = 35000 if subset == "train" else 5000
    os.makedirs(specs['chips_balanced'] + subset, exist_ok=True)
    
    for folder in glob.glob(specs["chips_combined"] + f"{subset}/*"):
        os.makedirs(specs['chips_balanced'] + subset + "/" + Path(folder).name, exist_ok=True)
        files = glob.glob(folder + "/*")
        random.shuffle(files)
        for f in files[:total]:
            f = Path(f)
            shutil.copy(f, specs['chips_balanced'] + subset + "/" + Path(folder).name + "/" + f.name)

In [6]:
model_specs = {
    'meta_class_map': group_mapping,
    'meta_class_names': [
        "Urbanity", 
        "Dense residential neighbourhoods",
        "Connected residential neighbourhoods",
        "Dense urban neighbourhoods",
        "Accessible suburbia",
        "Open sprawl",
        "Warehouse/Park land",
        "Gridded residential quarters",
        "Disconnected suburbia",
        "Countryside agriculture", 
        "Wild countryside", 
        "Urban buffer"
    ],
    'meta_chip_size': 32,
}


In [7]:
model = model_builder(
    model_name="efficientnet", 
    bridge="pooling", 
    top_layer_neurons=256,
    n_labels=12,
    input_shape=(224, 224, 3),
)

2022-02-21 19:07:01.549167: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6517 MB memory:  -> device: 0, name: Quadro RTX 4000, pci bus id: 0000:21:00.0, compute capability: 7.5


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb4_notop.h5


In [None]:
h = tools_keras.fit_phase(
        model,
        specs['chips_balanced'] + 'train',
        specs['chips_balanced'] + 'validation',
        specs['chips_balanced'] + 'secret',
        log_folder=specs["folder"] + "logs",
        pred_folder=specs["folder"] + "pred",
        model_folder=specs["folder"] + "model",
        json_folder=specs["folder"] + "json",
        specs=model_specs,
        epochs=200,
        patience=5,
        batch_size=32,
        verbose=True,
    )

Model: "efficientnet_pooling_256_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
resizing (Resizing)          (None, 224, 224, 3)       0         
_________________________________________________________________
efficientnetb4 (Functional)  (None, 7, 7, 1792)        17673823  
_________________________________________________________________
global_average_pooling2d (Gl (None, 1792)              0         
_________________________________________________________________
dense (Dense)                (None, 256)               459008    
_________________________________________________________________
dense_1 (Dense)              (None, 12)                3084      
Total params: 18,135,915
Trainable params: 462,092
Non-trainable params: 17,673,823
_____________________

2022-02-21 19:07:06.202169: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2022-02-21 19:07:06.202199: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2022-02-21 19:07:06.202715: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1614] Profiler found 1 GPUs
2022-02-21 19:07:06.340010: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2022-02-21 19:07:06.340179: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1749] CUPTI activity buffer flushed


Found 420000 images belonging to 12 classes.
Found 58134 images belonging to 12 classes.
training...


2022-02-21 19:07:35.804154: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/200


2022-02-21 19:07:44.778001: I tensorflow/stream_executor/cuda/cuda_dnn.cc:381] Loaded cuDNN version 8300


    1/13125 [..............................] - ETA: 45:02:00 - loss: 2.5577 - accuracy: 0.0312

2022-02-21 19:07:48.484811: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2022-02-21 19:07:48.484849: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.


    2/13125 [..............................] - ETA: 4:39:00 - loss: 2.5870 - accuracy: 0.0625 

2022-02-21 19:07:49.519324: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2022-02-21 19:07:49.520112: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1749] CUPTI activity buffer flushed
2022-02-21 19:07:49.582884: I tensorflow/core/profiler/internal/gpu/cupti_collector.cc:673]  GpuTracer has collected 9459 callback api events and 7601 activity events. 
2022-02-21 19:07:49.670164: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2022-02-21 19:07:49.803630: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: ../urbangrammar_samba/spatial_signatures/ai/gb_32_temporal/logs/efficientnet_pooling_256_12/train/plugins/profile/2022_02_21_19_07_49

2022-02-21 19:07:49.885436: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to ../urbangrammar_samba/spatial_signatures/ai/gb_32_temporal/logs/efficientnet_pooling_256_12/train/plugins/p

    3/13125 [..............................] - ETA: 3:29:01 - loss: 2.5480 - accuracy: 0.0729

2022-02-21 19:07:50.031675: I tensorflow/core/profiler/rpc/client/capture_profile.cc:251] Creating directory: ../urbangrammar_samba/spatial_signatures/ai/gb_32_temporal/logs/efficientnet_pooling_256_12/train/plugins/profile/2022_02_21_19_07_49
Dumped tool data for xplane.pb to ../urbangrammar_samba/spatial_signatures/ai/gb_32_temporal/logs/efficientnet_pooling_256_12/train/plugins/profile/2022_02_21_19_07_49/b0aa527ea1da.xplane.pb
Dumped tool data for overview_page.pb to ../urbangrammar_samba/spatial_signatures/ai/gb_32_temporal/logs/efficientnet_pooling_256_12/train/plugins/profile/2022_02_21_19_07_49/b0aa527ea1da.overview_page.pb
Dumped tool data for input_pipeline.pb to ../urbangrammar_samba/spatial_signatures/ai/gb_32_temporal/logs/efficientnet_pooling_256_12/train/plugins/profile/2022_02_21_19_07_49/b0aa527ea1da.input_pipeline.pb
Dumped tool data for tensorflow_stats.pb to ../urbangrammar_samba/spatial_signatures/ai/gb_32_temporal/logs/efficientnet_pooling_256_12/train/plugins/pro

Epoch 2/200
Epoch 3/200

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 4/200
Epoch 5/200