# Notebook to run a VGG16 model on the Labelled After Fire GeoTIFF Images
## This model contains methods to hypothetically improve the results seen in the VGG16.ipynb
### Class imbalance is adressed, regularization and dropout is implemented for overfitting.
### Added callbacks for early stopping, model checkpointing, and logging

#### Import necessary packages and libraries

In [1]:
import os
import numpy as np
import rasterio
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle, class_weight
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger
import tensorflow_addons as tfa
from tensorflow.keras.regularizers import l2

2024-02-10 08:37:10.216088: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



#### Define a custom image generator for GeoTIFF files using rasterio to use on the VGG16 model

In [2]:
# Define the base paths for training and testing
base_training_path = '/Volumes/HD710PRO/Fire_and_Hurricane_Images/Fire/BinaryData/Training'
base_testing_path = '/Volumes/HD710PRO/Fire_and_Hurricane_Images/Fire/BinaryData/Test'

# Custom image generator
def custom_image_generator(file_paths, batch_size):
    while True:
        file_paths = shuffle(file_paths)
        for i in range(0, len(file_paths), batch_size):
            batch_files = file_paths[i:i + batch_size]
            images, labels = [], []
            for file in batch_files:
                with rasterio.open(file) as src:
                    image = src.read()
                    image = np.moveaxis(image, 0, -1)  # Channels last
                label = 1 if '/Damaged/' in file else 0
                images.append(image)
                labels.append(label)
            yield np.array(images), np.array(labels)


#### Compute the file paths and split the data into training, test and validation sets

In [6]:
# Function to get file paths
def get_file_paths(base_path):
    file_paths = []
    for root, dirs, files in os.walk(base_path):
        for file in files:
            if file.endswith('.tif'):
                file_paths.append(os.path.join(root, file))
    return file_paths

# Get file paths
train_files = get_file_paths(base_training_path)
test_files = get_file_paths(base_testing_path)

# Split training data for validation
train_files, val_files = train_test_split(train_files, test_size=0.1, random_state=42)


#### Handle class imbalance with the weights, create data generators and define the model architecture with L2 Regularization and Dropout

In [7]:
# Compute class weights
labels = [1 if '/Damaged/' in file else 0 for file in train_files]
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(labels), y=labels)
class_weights_dict = dict(enumerate(class_weights))

batch_size = 32
train_generator = custom_image_generator(train_files, batch_size)
val_generator = custom_image_generator(val_files, batch_size)
test_generator = custom_image_generator(test_files, batch_size)

# Model architecture with Dropout and L2 regularization
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(256, 256, 3))
for layer in base_model.layers:
    layer.trainable = False
x = Flatten()(base_model.output)
x = Dense(1024, activation='relu', kernel_regularizer=l2(0.001))(x)  # L2 regularization
x = Dropout(0.5)(x)  # Dropout
output = Dense(1, activation='sigmoid', kernel_regularizer=l2(0.001))(x)  # L2 regularization on output layer as well
model = Model(inputs=base_model.input, outputs=output)


#### Compile, fit, save and evaluate the model

In [8]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='binary_crossentropy',
              metrics=['accuracy', Precision(), Recall(), tfa.metrics.F1Score(num_classes=1, threshold=0.5)])

# Callbacks for early stopping, model checkpointing, and logging
checkpoint_filepath = '/tmp/checkpoint'
model_checkpoint_callback = ModelCheckpoint(filepath=checkpoint_filepath, save_weights_only=True, monitor='val_accuracy', mode='max', save_best_only=True)
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
csv_logger = CSVLogger('training_log.csv', append=True)

# Fit the model with callbacks
history = model.fit(train_generator,
                    steps_per_epoch=len(train_files) // batch_size,
                    validation_data=val_generator,
                    validation_steps=len(val_files) // batch_size,
                    epochs=10,
                    class_weight=class_weights_dict,
                    callbacks=[model_checkpoint_callback, early_stopping_callback, csv_logger])

# Save the model
model.save('wildfiredetectionafterVGG16.keras')

# Evaluate the model
test_loss, test_accuracy, test_precision, test_recall, test_f1_score = model.evaluate(test_generator, steps=len(test_files) // batch_size)
print(f"Test Accuracy: {test_accuracy}")
print(f"Test Precision: {test_precision}")
print(f"Test Recall: {test_recall}")
print(f"Test F1 Score: {test_f1_score}")

Epoch 1/10


KeyboardInterrupt: 