<a href="https://colab.research.google.com/github/vtaing10/FlyHigh/blob/main/FlyHigh.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
from google.colab import drive
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,Input,GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from collections import Counter

import time
import numpy as np
import tensorflow as tf
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt

In [7]:
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
dataset_fileVT = '/content/drive/MyDrive/data2/train'

# print(os.listdir('/content/drive/MyDrive/data2/train'))

In [10]:
# Load dataset (Adjust batch size and image size as needed)
batch_size = 128
img_size = (224, 224)

# Training and Validation Split (80% train, 20% validation)
train_dataset = image_dataset_from_directory(
    dataset_fileVT,
    validation_split=0.35,
    subset="training",
    seed=123,
    image_size=img_size,
    batch_size=batch_size,
    label_mode="categorical"
)

val_dataset = image_dataset_from_directory(
    dataset_fileVT,
    validation_split=0.35,
    subset="validation",
    seed=123,
    image_size=img_size,
    batch_size=batch_size,
    label_mode="categorical"
)

# Normalize images
train_dataset = train_dataset.map(lambda x, y: (tf.cast(x, tf.float32) / 255.0, y))
val_dataset = val_dataset.map(lambda x, y: (tf.cast(x, tf.float32) / 255.0, y))

# # Data augmentation using tf.keras.Sequential

data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.2),
    layers.Resizing(224, 224)
])



# Apply augmentation to training data
train_dataset = train_dataset.map(lambda x, y: (data_augmentation(x, training=True), y))


all_labels = []
for _, labels in train_dataset.unbatch():
    all_labels.append(np.argmax(labels.numpy()))

counter = Counter(all_labels)
max_count = max(counter.values())

# Oversample dataset to balance classes
def balance_dataset(dataset, max_count):
    def generator():
        for images, labels in dataset:
            label = np.argmax(labels.numpy())
            repeat_factor = max_count // counter[label]
            for _ in range(repeat_factor):
                yield images, labels

    return tf.data.Dataset.from_generator(generator, output_signature=(
        tf.TensorSpec(shape=(224, 224, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(len(counter),), dtype=tf.float32),
    ))

# Inspect the dataset
for images, labels in train_dataset.take(1):
    print("Image batch shape:", images.shape)
    print("Label batch shape:", labels.shape)

# Prefetch for better performance
AUTOTUNE = tf.data.AUTOTUNE
train_dataset = train_dataset.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_dataset = val_dataset.cache().prefetch(buffer_size=AUTOTUNE)


Found 7684 files belonging to 11 classes.
Using 4995 files for training.
Found 7684 files belonging to 11 classes.
Using 2689 files for validation.
Image batch shape: (128, 224, 224, 3)
Label batch shape: (128, 11)


In [11]:
#Base model
base_model = MobileNetV2(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
# base_model = load_model("/content/drive/My Drive/modelPoly/cloud_classifier4.keras")
base_model.trainable = True
for layer in base_model.layers[:100]:
    layer.trainable = False

# base_model.summary()

# Define the model with an Input layer

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.005)),
    Dropout(0.4),
    Dense(len(os.listdir(dataset_fileVT)), activation='softmax')
])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [12]:

#Compile model
model.compile(
    optimizer=Adam(learning_rate=0.000005),
)
model.summary()


In [13]:
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-4
)

#Time batching loading
start_time = time.time()
for images, labels in train_dataset.take(1):
    print("Batch loaded successfully!")
end_time = time.time()
print(f"Time taken to load one batch: {end_time - start_time} seconds")

# Check Class Distribution and Compute Class Weights
import numpy as np
from sklearn.utils.class_weight import compute_class_weight


 #Compute Class Weights
all_labels = []
for _, labels in train_dataset.unbatch():
    all_labels.append(np.argmax(labels.numpy()))

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(all_labels),
    y=all_labels
)

# Convert to dictionary
class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}
print("Computed class weights:", class_weights_dict)

# Train the Model with Class Weights
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=50,
    class_weight=class_weights_dict,
    callbacks=[early_stopping, lr_scheduler]
)

Batch loaded successfully!
Time taken to load one batch: 68.77587652206421 seconds
Computed class weights: {0: 0.7359658170030942, 1: 2.1318822023047375, 2: 0.7555589169565875, 3: 1.8091271278522274, 4: 0.8535543403964456, 5: 1.7532467532467533, 6: 3.220502901353965, 7: 0.6128082443871917, 8: 1.000200240288346, 9: 0.5090705258866693, 10: 1.544526901669759}
Epoch 1/50




[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 2s/step - loss: 1.1587 - val_loss: 1.1507 - learning_rate: 5.0000e-06
Epoch 2/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 111ms/step - loss: 1.1481 - val_loss: 1.1401 - learning_rate: 5.0000e-06
Epoch 3/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 111ms/step - loss: 1.1375 - val_loss: 1.1297 - learning_rate: 5.0000e-06
Epoch 4/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 112ms/step - loss: 1.1271 - val_loss: 1.1193 - learning_rate: 5.0000e-06
Epoch 5/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 111ms/step - loss: 1.1168 - val_loss: 1.1091 - learning_rate: 5.0000e-06
Epoch 6/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 115ms/step - loss: 1.1064 - val_loss: 1.0989 - learning_rate: 5.0000e-06
Epoch 7/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 113ms/step - loss: 1.0964 - val_loss: 1.0888 - lea

In [152]:
#Define the model directory and ensure it exists
model_dir = "/content/drive/My Drive/modelPoly"

if not os.path.exists(model_dir):
    os.makedirs(model_dir)
    print(f"Created directory: {model_dir}")
else:
    print(f"Directory already exists: {model_dir}")

model_path = os.path.join(model_dir, "cloud_classifier.keras")

model_path = "/content/drive/MyDrive/modelPoly/cloud_classifier.keras"
# #Save the model
model.save(model_path)
# # print(f"Model saved to: {model_path}")


#Load the saved model
loaded_model = load_model(model_path)
print("Model successfully loaded!")

#Define the test dataset directory
testdataset_path = "/content/drive/MyDrive/data/test"

#Get all .jpg image file names
image_files = [f for f in os.listdir(testdataset_path) if f.endswith(".jpg")]

class_names = os.listdir("/content/drive/MyDrive/data/train")
total_confidence = 0
num_images = len(image_files)

#Loop through each image and evaluate
for img_file in image_files:
    img_path = os.path.join(testdataset_path, img_file)

    img = cv2.imread(img_path)
    img = cv2.resize(img, (224, 224))
    img = img / 255.0
    img = np.expand_dims(img, axis=0)

#Make a prediction
    prediction = loaded_model.predict(img)

    predicted_index = np.argmax(prediction)
    predicted_class = class_names[predicted_index]
    confidence = np.max(prediction)
    total_confidence += confidence

#print
    print(f"Image: {img_file} → Predicted Cloud Type: {predicted_class} (Confidence: {confidence * 100:.2f}%)")
if num_images > 0:
    average_confidence = (total_confidence / num_images) * 100
    print(f"\n📌 Average Confidence Accuracy: {average_confidence:.2f}%")
else:
    print("\n⚠ No test images found! Please check your test directory.")

Directory already exists: /content/drive/My Drive/modelPoly
