# 1. **Dataloader**

In [None]:
# import necessary libraries

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import load_model


import keras
import keras.utils
from keras.utils import to_categorical

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import os
from PIL import Image
import time

In [None]:
# Set image size
img_size = (224, 224)

# Set batch size
batch_size = 32

# Define image data generator
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

# Define the paths to the image files and the labels
image_dir = 'C:/Users/maria/Documents/GitHub/CSCU9M6-2929300/MixDataset/train'
imageA_dir = 'C:/Users/maria/Documents/GitHub/CSCU9M6-2929300/Dataset/Atrain'
imageB_dir = 'C:/Users/maria/Documents/GitHub/CSCU9M6-2929300/Dataset/Btrain'
label_file = 'C:/Users/maria/Documents/GitHub/CSCU9M6-2929300/'


# Define the labels
labels = ['ara', 'ban', 'che', 'pal', 'pla', 'art', 'ced', 
          'ash', 'syc', 'oak', 'app', 'pin', 'bee', 'bct', 'not']

def load_and_resize(filename, label):
    # Load the image
    image = tf.io.read_file(filename)
    # Decode the image
    image = tf.image.decode_jpeg(image, channels=3)
    # Resize the image
    image = tf.image.resize(image, [img_size[0], img_size[1]])
    # Rescale the pixel values
    image = image / 255.0
    # Return the image and label
    return image, label

def create_dataset(filenames, labels, batch_size):
    # Create TensorFlow dataset
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    # Shuffle the dataset
    dataset = dataset.shuffle(buffer_size=len(filenames), reshuffle_each_iteration=False)
    # Load and preprocess the images and labels in parallel
    dataset = dataset.map(
        map_func=load_and_resize,
        num_parallel_calls=tf.data.experimental.AUTOTUNE)
    # Batch the dataset
    dataset = dataset.batch(batch_size)
    # Prefetch the dataset
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    # Return the dataset
    return dataset

# Create a function to load and preprocess the image and its label
def load_and_preprocess_image(filename, label):
    # Load the image
    image_string = tf.io.read_file(filename)
    # Decode the image
    image_decoded = tf.image.decode_jpeg(image_string, channels=3)
    # Preprocess the image
    image, label = preprocess_image(image_decoded, label)
    # Return the image and its label
    return image, label

# Load the labels from the file
with open(os.path.join(label_file, 'imageData.txt')) as f:
    lines = f.readlines()
    label_dict = {}
    for line in lines:
        filename, label, _ = line.strip().split('.', maxsplit=2) # added maxsplit to prevent an error i was
        if label in labels:                                      # getting due to the way I named my files
               label_dict[os.path.join(image_dir, filename)] = labels.index(label)
        
# Get the filenames and label
filenames = []
labels = []

# Loop through each folder in the directory
for class_name in os.listdir(image_dir):
    class_dir = os.path.join(image_dir, class_name)
    for filename in os.listdir(class_dir):
        filepath = os.path.join(class_dir, filename)
        filenames.append(filepath)
        labels.append(class_name)
        
# Create a train-validation split
train_filenames, test_filenames, train_labels, test_labels = train_test_split(
    filenames, labels, test_size=0.2, random_state=42)

Atrain_filenames, Atest_filenames, Atrain_labels, Atest_labels = train_test_split(
    filenames, labels, test_size=0.2, random_state=42)

Btrain_filenames, Btest_filenames, Btrain_labels, Btest_labels = train_test_split(
    filenames, labels, test_size=0.2, random_state=42)

# Define data directories
train_dir = 'C:/Users/maria/Documents/GitHub/Tree-Species-Classification-with-Deep-Learning/MixDataset/train'
Atrain_dir = 'C:/Users/maria/Documents/GitHub/Tree-Species-Classification-with-Deep-Learning/Dataset/Atrain'
Btrain_dir = 'C:/Users/maria/Documents/GitHub/Tree-Species-Classification-with-Deep-Learning/Dataset/Btrain'
test_dir = 'C:/Users/maria/Documents/GitHub/Tree-Species-Classification-with-Deep-Learning/MixDataset/test'
Atest_dir = 'C:/Users/maria/Documents/GitHub/Tree-Species-Classification-with-Deep-Learning/Dataset/Atest'
Btest_dir = 'C:/Users/maria/Documents/GitHub/Tree-Species-Classification-with-Deep-Learning/Dataset/Btest'
val_dir = 'C:/Users/maria/Documents/GitHub/Tree-Species-Classification-with-Deep-Learning/MixDataset/val'
Aval_dir = "C:/Users/maria/Documents/GitHub/Tree-Species-Classification-with-Deep-Learning/Dataset/Aval"
Bval_dir = 'C:/Users/maria/Documents/GitHub/Tree-Species-Classification-with-Deep-Learning/Dataset/Bval'

# Create TensorFlow dataset
dataset = create_dataset(filenames, labels, batch_size)

# Print the shape of the dataset
print(dataset.element_spec)

---

# 2. **Proposed solution** 

In [None]:
img_size = (224, 224)

# Load and preprocess the training and testing datasets
train_dataset = create_dataset(train_filenames, train_labels, batch_size)
test_dataset = create_dataset(test_filenames, test_labels, batch_size)

# Define image data generator
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical')

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical')

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(244, 244),
    batch_size=batch_size,
    class_mode='categorical')

# create model using VGG16 architecture
def create_model():
    vgg16_model = tf.keras.applications.VGG16(input_shape=(IMG_SIZE, IMG_SIZE, 3), include_top=False, weights='imagenet')
    for layer in vgg16_model.layers:
        layer.trainable = False
        
    model = keras.models.Sequential([
        keras.applications.VGG16(include_top=False, weights='imagenet', input_shape=(img_size[0]*2, img_size[1]*2, 3)),
        keras.layers.Flatten(),
        keras.layers.Dense(256, activation='relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(15, activation='softmax')])
    return model

# Define the model
input_shape = img_size + (3,)
model = keras.models.Sequential([
    keras.applications.VGG16(include_top=False, weights='imagenet', input_shape=input_shape),
    keras.layers.Flatten(),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(15, activation='softmax')
])

---

# 3. **Experimental tests and evaluations** 

In [None]:
# Training model for City A

# Get the list of filenames in the directory
Atrain_dir_filenames = os.listdir(Atrain_dir)
Atest_dir_filenames = os.listdir(Atest_dir)
Aval_dir_filenames = os.listdir(Aval_dir)

# Load and preprocess the data using tf.data
Atrain_data = create_dataset(Atrain_dir_filenames, Atrain_labels, batch_size)
Aval_data = create_dataset(aval_dir_filenames, Aval_labels, batch_size)
Atest_dataset = create_dataset(Atest_filenames, Atest_labels, batch_size)
  
# Build model
model = tf.keras.models.Sequential([
        vgg16_model,
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

# Compile the model
model.compile(optimizer='adam', 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

# Train the model
model.fit(Atrain_images, train_labels, epochs=10)

# Save the model
model.save('modelA')


In [None]:
# Training model for City B

# Get the list of filenames in the directory
Btrain_dir_filenames = os.listdir(Btrain_dir)
Btest_dir_filenames = os.listdir(Btest_dir)
Bval_dir_filenames = os.listdir(Bval_dir)

# Load and preprocess the data using tf.data
Btrain_dataset = create_dataset(Btrain_filenames, Btrain_labels, batch_size)
Bval_data = create_dataset(Bval_dir_filenames, Bval_labels, batch_size)
Btest_dataset = create_dataset(Btest_filenames, Btest_labels, batch_size)

# Build model
model = tf.keras.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(10, activation='softmax')
])

# Compile model

model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

# Train model
model.fit(Btrain_images, train_labels, epochs=10)

# Save the model
model.save('modelB')

In [None]:
# Testing for City A

# Evaluate the model
test_loss, test_acc = model.evaluate(Atest_dataset)
print('ATest accuracy:', test_acc)


In [None]:
# Testing for City B

# Evaluate model
test_loss, test_acc = model.evaluate(Btest_dataset, test_labels, verbose=2)
print('BTest accuracy:', test_acc)

In [None]:
# Model B for City A dataset

# Load the saved model for City B
model_B = load_model('modelB')

# Load test data for City A
Atest_filenames = os.listdir(Atest_dir)
Atest_labels = [get_label_from_filename("label") for filename in Atest_filenames]

# Create test dataset for City A
test_dataset = create_dataset(Atest_filenames, Atest_labels, batch_size)

# Evaluate model on the test data for City A
loss, accuracy = model_B.evaluate(Atest_dataset)

# Print the test accuracy for City A
print("Test accuracy for City A:", accuracy)

In [None]:
# Model A for City B dataset

# Load the saved model for City A
model_A = tf.keras.models.load_model('modelA')

# Load the test data for City B
Btest_filenames = os.listdir(Btest_dir)
Btest_labels = [get_label_from_filename("label") for filename in Btest_filenames]

# Create the test dataset for City B
Btest_dataset = create_dataset(Btest_filenames, Btest_labels, batch_size)

# Evaluate the model on the test data for City B
loss, accuracy = model_A.evaluate(Btest_dataset)

# Print the test accuracy for City B
print("Test accuracy for City B:", accuracy)