# Train Model

### Imports

In [1]:
import sys
import os
from config import Config
sys.path.append(Config.root_path)

import importlib
import json
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Load Model
from Python.model.segmentation_model import deeplabv3plus
# Load Dataset & Preprocessing
from Python.data_processing.utils import get_png_paths_from_dir
from Python.data_processing.utils import load_weight_map
from Python.data_processing.utils import split_dataset_paths
from Python.data_processing.data_generator import generate_image_dataset_from_files
from Python.data_processing.data_generator import augment_dataset
# Print Model Prediction
from Python.data_processing.compare_predictions import show_predictions
# Save Model
from Python.data_processing.save_model import write_model_to_disk

print(tf.config.list_physical_devices('GPU'))

2023-04-07 13:56:10.707953: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-07 13:56:12.303411: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/lib/:/home/vidarmarsh/miniconda3/envs/tf/lib/
2023-04-07 13:56:12.303544: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/lib/:/home/vidarmarsh/miniconda3/envs/tf/lib/


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


### Prepare dataset

In [6]:
with open(Config.partition_path) as file:
    filepath_partitions = json.load(file)
for key in filepath_partitions.keys():
    partition = filepath_partitions.get(key)
    partition[0] = [os.path.join(Config.image_path, path) for path in partition[0]]
    partition[1] = [os.path.join(Config.segmentation_path, path) for path in partition[1]]
    filepath_partitions[key] = partition
train_files = filepath_partitions.get("train")
val_files = filepath_partitions.get("validation")
test_files = filepath_partitions.get("test")
weight_map = load_weight_map(Config.weight_map_path)
train_weights = np.zeros(Config.output_channels, dtype=float)
for key in weight_map.keys():
    train_weights[int(key)] = weight_map.get(key)
val_test_weights = np.ones(Config.output_channels, dtype=np.float32)
val_test_weights[Config.background_label] = 0
train_dataset = generate_image_dataset_from_files(
    train_files[0], 
    train_files[1],
    Config.batch_size,
    tf.data.AUTOTUNE, 
    Config.shuffle_size, 
    train_weights
)
val_dataset = generate_image_dataset_from_files(
    val_files[0], 
    val_files[1],
    Config.batch_size,
    tf.data.AUTOTUNE, 
    Config.shuffle_size, 
    val_test_weights
)
test_dataset = generate_image_dataset_from_files(
    test_files[0], 
    test_files[1],
    Config.batch_size,
    tf.data.AUTOTUNE, 
    Config.shuffle_size, 
    val_test_weights
)
train_dataset = augment_dataset(train_dataset)
print(f"Datasets complete\nTrain dataset: {len(train_dataset)} batches")
print(f"Validation dataset: {len(val_dataset)} batches")
print(f"Test dataset: {len(test_dataset)} batches")

2023-04-07 13:58:25.387556: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at whole_file_read_ops.cc:114 : FAILED_PRECONDITION: /home/vidarmarsh/CEZ_Mapping/Data/Priddy_processed/SegmentationClass_categorical_512; Is a directory
2023-04-07 13:58:25.387629: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at whole_file_read_ops.cc:114 : FAILED_PRECONDITION: /home/vidarmarsh/CEZ_Mapping/Data/Priddy_processed/SegmentationClass_categorical_512; Is a directory
2023-04-07 13:58:25.387652: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at whole_file_read_ops.cc:114 : FAILED_PRECONDITION: /home/vidarmarsh/CEZ_Mapping/Data/Priddy_processed/SegmentationClass_categorical_512; Is a directory
2023-04-07 13:58:25.387678: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at whole_file_read_ops.cc:114 : FAILED_PRECONDITION: /home/vidarmarsh/CEZ_Mapping/Data/Priddy_processed/JPEGImages_512; Is a directory
2023-04-07 13:58:25.387697:

Datasets complete
Train dataset: 88 batches
Validation dataset: 19 batches
Test dataset: 19 batches


### Load Model

In [3]:
model = deeplabv3plus(
    Config.input_shape,
    Config.batch_size,
    Config.output_channels,
    Config.channels_low,
    Config.channels_high,
    Config.middle_repeat
)
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    Config.initial_learning_rate,
    decay_steps=Config.decay_steps,
    decay_rate=Config.decay_rate,
    staircase=Config.decay_discrete) # not sure about this implementation currently


trainable_count = np.sum(
    [tf.keras.backend.count_params(w) for w in model.trainable_weights]
)
print(f"Model: {model.name}\nTrainable parameters: {trainable_count}")
model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate= lr_schedule),
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics= ["accuracy"],
    weighted_metrics = ["accuracy"]
)

Model: model
Trainable parameters: 27836616


### Compare Image, Segmentation Mask and Prediction

In [5]:
show_predictions(model, train_dataset)

2023-04-07 13:57:25.086931: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at whole_file_read_ops.cc:114 : FAILED_PRECONDITION: /home/vidarmarsh/CEZ_Mapping/Data/Priddy_processed/SegmentationClass_categorical_512; Is a directory
2023-04-07 13:57:25.087638: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at whole_file_read_ops.cc:114 : FAILED_PRECONDITION: /home/vidarmarsh/CEZ_Mapping/Data/Priddy_processed/JPEGImages_512; Is a directory
2023-04-07 13:57:25.087687: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at whole_file_read_ops.cc:114 : FAILED_PRECONDITION: /home/vidarmarsh/CEZ_Mapping/Data/Priddy_processed/SegmentationClass_categorical_512; Is a directory
2023-04-07 13:57:25.087710: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at whole_file_read_ops.cc:114 : FAILED_PRECONDITION: /home/vidarmarsh/CEZ_Mapping/Data/Priddy_processed/SegmentationClass_categorical_512; Is a directory
2023-04-07 13:57:25.087763:

FailedPreconditionError: {{function_node __wrapped__IteratorGetNext_output_types_3_device_/job:localhost/replica:0/task:0/device:CPU:0}} /home/vidarmarsh/CEZ_Mapping/Data/Priddy_processed/JPEGImages_512; Is a directory
	 [[{{node ReadFile}}]] [Op:IteratorGetNext]

### Model Training

Train Model

In [None]:
# Update any changes to Config
importlib.reload(sys.modules["config"])
from config import Config
# Train model
model_history = model.fit(
    x=train_dataset,
    epochs=Config.epochs,
    validation_data=val_dataset,
    shuffle=True
) 

Show Model Performance

In [None]:
loss = model_history.history['loss']
val_loss = model_history.history['val_loss']

plt.figure()
plt.plot(model_history.epoch, loss, 'r', label='Training accuracy')
plt.plot(model_history.epoch, val_loss, 'bo', label='Validation accuracy')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss Value')
plt.ylim([0, 1])
plt.legend()
plt.show()

### Save Model

In [None]:
train_names = [os.path.split(img_path)[1] for img_path in train_files[0]]
val_names = [os.path.split(img_path)[1] for img_path in val_files[0]]
test_names = [os.path.split(img_path)[1] for img_path in test_files[0]]
file_partitions = dict(
    "train", train_files, "validation", val_files, "test", test_files
)
write_model_to_disk(
    model, model_history, file_partitions, Config.model_dir_path, Config
)