In [None]:
# Install Kaggle (if not installed)
!pip install -q kaggle

# Authenticate Kaggle API and download dataset
import os
from google.colab import files

# Upload kaggle.json only if it's not already uploaded
if not os.path.exists("/root/.kaggle/kaggle.json"):
    files.upload()  # Select kaggle.json when prompted

# Ensure the Kaggle directory exists
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Force download dataset to avoid version issues
!kaggle datasets download -d humansintheloop/semantic-segmentation-of-aerial-imagery --force

# ✅ Check if dataset is already extracted before unzipping
dataset_path = "/content/dataset"

if not os.path.exists(dataset_path):
    print("Extracting dataset...")
    !unzip -oq semantic-segmentation-of-aerial-imagery.zip -d /content/dataset
else:
    print("Dataset already exists, skipping extraction.")


Saving kaggle(1).json to kaggle(1) (1).json
mv: cannot stat 'kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory
Dataset URL: https://www.kaggle.com/datasets/humansintheloop/semantic-segmentation-of-aerial-imagery
License(s): CC0-1.0
Downloading semantic-segmentation-of-aerial-imagery.zip to /content
 61% 18.0M/29.6M [00:00<00:00, 187MB/s]
100% 29.6M/29.6M [00:00<00:00, 195MB/s]
Dataset already exists, skipping extraction.


In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import Rescaling


def Unet_model(input_shape=(160,160,3),num_classes=6):
    inputs=tf.keras.Input(input_shape) #input layer
    #now comes encoder which encodes the image into a numeric representation
    x=layers.Rescaling(scale=1./255)(inputs)
    #rescaling the image to make computation easier
    c1=layers.Conv2D(64,(3,3),activation='relu',padding='same')(x) #convolutional layer moves the kernel accross this for absorbing patterns
    c1=layers.Conv2D(64,(3,3),activation='relu',padding='same')(c1)
    p1=layers.MaxPooling2D((2,2))(c1) #downscales the image from 160 to 80
    #maxpooling takes the image from 160*160 to 80*80 which means more deeper features should be captured to retain accuracy
    # More Depth meaning more filters so the no of filters(64 originally) doubles every time maxPooling is used to half
    c2=layers.Conv2D(128,(3,3),activation='relu',padding='same')(p1)
    c2=layers.Conv2D(128,(3,3),activation='relu',padding='same')(c2)
    p2=layers.MaxPooling2D((2,2))(c2)
    #every thing acc to the first diagram (from 80*80 to 40*40)
    c3=layers.Conv2D(256,(3,3),activation='relu',padding='same')(p2)
    c3=layers.Conv2D(256,(3,3),activation='relu',padding='same')(c3)
    p3=layers.MaxPooling2D((2,2))(c3)
    #from 40*40 to 20*20
    c4=layers.Conv2D(512,(3,3),activation='relu',padding='same')(p3)
    c4=layers.Conv2D(512,(3,3),activation='relu',padding='same')(c4)
    p4=layers.MaxPooling2D((2,2))(c4)
    #from 20*20 to 10*10 final layer(aka bottleneck layer)
    #we can go for 5*5 layer too but the image might get so small the objects might dissapear
    c5=layers.Conv2D(1024,(3,3),activation='relu',padding='same')(p4)
    c5=layers.Conv2D(1024,(3,3),activation='relu',padding='same')(c5)

    #Now, the start of upsampling which means the model will reconstruct the image based on
    #the patterns it learnt during the downsampling.(This is where it will get fucking annoying.)

    u6=layers.UpSampling2D((2,2))(c5)
    #it went from 10*10 to 20*20 BUT the rest of the pixels do NOT contain info yet.
    u6=layers.Conv2D(512,(3,3,),activation='relu',padding='same')(u6)
    #this moves the kernels accross the unpopulated image
    #reconstructing the pixels it lost from the corresponding encoding phase
    m6=layers.concatenate([u6,c4])
    #concatenate the final 20*20 image with the corresponding image in the encoding phase
    # to retain high_resolution details while reconstructing. This is the SKIP CONNECTION
    #basically increase the image size->repopulate the image->refine the image
    c6=layers.Conv2D(512,(3,3),activation='relu',padding='same')(m6)
    c6=layers.Conv2D(512,(3,3),activation='relu',padding='same')(c6) #further refinement

    u7=layers.UpSampling2D((2,2))(c6) #20*20 to 40*40
    u7=layers.Conv2D(256,(3,3),activation='relu',padding='same')(u7)
    m7=layers.concatenate([u7,c3])
    c7=layers.Conv2D(256,(3,3),activation='relu',padding='same')(m7)
    c7=layers.Conv2D(256,(3,3),activation='relu',padding='same')(c7)

    u8=layers.UpSampling2D((2,2))(c7) #40*40 to 80*80
    u8=layers.Conv2D(128,(3,3),activation='relu',padding='same')(u8)
    m8=layers.concatenate([u8,c2])
    c8=layers.Conv2D(128,(3,3),activation='relu',padding='same')(m8)
    c8=layers.Conv2D(128,(3,3),activation='relu',padding='same')(c8)

    u9=layers.UpSampling2D((2,2))(c8) #80*80 to 160*160
    u9=layers.Conv2D(64,(3,3),activation='relu',padding='same')(u9)
    m9=layers.concatenate([u9,c1])
    c9=layers.Conv2D(64,(3,3),activation='relu',padding='same')(m9)
    c9=layers.Conv2D(64,(3,3),activation='relu',padding='same')(c9)
    #Now the U is completed
    #the output layer (another tricky ass part)
    outputs=layers.Conv2D(num_classes,(1,1),activation='softmax')(c9)
    # num_classes is the number of segments we're looking for(6 namely roads,vegtation,building,land,water,unlabelled)
    #(1,1) is the convolutional that is applied with (num_classes) filters to the big-ass feature-map is mapped to the 6 outputs we want
    #softmax is the function used in multiclass segementation where it produces the probability distribution accross all classes
    #each pixel will have a specific probability for each class.
    # class with the highest probability is the predicted class

    model=tf.keras.Model(inputs,outputs,name='UNet_160by160')
    return model

model=Unet_model()
model.summary()


In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
import albumentations as A  # For advanced augmentations
from tensorflow.data import Dataset

# -------------------------------
# ✅ Auto-Detect Dataset Path
# -------------------------------
DATASET_DIR = "/content/dataset/Semantic segmentation dataset"

# Check if dataset exists
if not os.path.exists(DATASET_DIR):
    raise FileNotFoundError(f"Dataset directory not found: {DATASET_DIR}")

# -------------------------------
# ✅ Define Class Mapping (Based on Provided RGB Values)
# -------------------------------
CLASS_MAPPING = {
    (155, 155, 155): 0,  # Unlabeled
    (132, 41, 246): 1,   # Land
    (110, 193, 228): 2,  # Road
    (254, 221, 58): 3,   # Vegetation
    (226, 169, 41): 4,   # Water
    (60, 16, 152): 5     # Building
}

# -------------------------------
# ✅ Function to Convert RGB Mask to Class Labels
# -------------------------------
def rgb_to_class(mask):
    """Convert an RGB segmentation mask into a class-labeled grayscale mask."""
    h, w, _ = mask.shape
    label_mask = np.zeros((h, w), dtype=np.uint8)

    for rgb, label in CLASS_MAPPING.items():
        mask_match = np.isclose(mask, np.array(rgb), atol=2).all(axis=-1)  # Allow slight variation
        label_mask[mask_match] = label  # Assign class label

    return label_mask

# -------------------------------
# ✅ Function to Extract Patches
# -------------------------------
def extract_patches(image, mask, patch_size=160, stride=160):
    """Extract non-overlapping patches from an image and its corresponding mask."""
    h, w = image.shape[:2]
    patches_img = []
    patches_mask = []

    for i in range(0, h - patch_size + 1, stride):
        for j in range(0, w - patch_size + 1, stride):
            img_patch = image[i:i+patch_size, j:j+patch_size]
            mask_patch = mask[i:i+patch_size, j:j+patch_size]
            patches_img.append(img_patch)
            patches_mask.append(mask_patch)

    return patches_img, patches_mask

# -------------------------------
# ✅ Function for Data Augmentation
# -------------------------------
AUGMENTATION = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.Rotate(limit=15, p=0.3)
])

def apply_augmentation(image, mask):
    """Apply data augmentation to image-mask pair."""
    augmented = AUGMENTATION(image=image, mask=mask)
    return augmented["image"], augmented["mask"]

# -------------------------------
# ✅ Function to Normalize Image
# -------------------------------
def normalize_image(image):
    """Normalize image pixel values to [0,1]."""
    return image.astype(np.float32) / 255.0

# -------------------------------
# ✅ Function to Load and Preprocess Image and Mask
# -------------------------------
def preprocess_image_mask(image_path, mask_path, patch_size=160, stride=160):
    """Load, preprocess, patch, and augment an image-mask pair."""
    # Load image and mask
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    mask = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)
    mask = cv2.cvtColor(mask, cv2.COLOR_BGR2RGB)

    # Convert mask to class labels
    mask = rgb_to_class(mask)

    # Extract patches
    img_patches, mask_patches = extract_patches(image, mask, patch_size, stride)

    # Normalize & Augment patches
    processed_images, processed_masks = [], []
    for img_patch, mask_patch in zip(img_patches, mask_patches):
        img_patch = normalize_image(img_patch)  # Normalize
        img_patch, mask_patch = apply_augmentation(img_patch, mask_patch)  # Augment
        processed_images.append(img_patch)
        processed_masks.append(mask_patch)

    return processed_images, processed_masks

# -------------------------------
# ✅ Function to Load Dataset
# -------------------------------
def load_dataset(dataset_dir, patch_size=160, stride=160):
    """Load all images and masks from dataset and preprocess them."""
    image_paths, mask_paths = [], []

    for tile in sorted(os.listdir(dataset_dir)):  # Iterate through dataset contents
        tile_path = os.path.join(dataset_dir, tile)

        # ✅ Skip if it's not a directory (Ignore 'classes.json')
        if not os.path.isdir(tile_path):
            continue

        img_dir = os.path.join(tile_path, "images")
        mask_dir = os.path.join(tile_path, "masks")

        # ✅ Check if images and masks exist before adding
        if not os.path.exists(img_dir) or not os.path.exists(mask_dir):
            print(f"⚠️ Skipping {tile} (Missing images or masks)")
            continue

        images = sorted(os.listdir(img_dir))
        masks = sorted(os.listdir(mask_dir))

        # ✅ Log each tile for debugging
        print(f"📁 {tile}: Found {len(images)} images, {len(masks)} masks.")

        for img_name, mask_name in zip(images, masks):
            image_paths.append(os.path.join(img_dir, img_name))
            mask_paths.append(os.path.join(mask_dir, mask_name))

    all_images, all_masks = [], []
    for img_path, mask_path in zip(image_paths, mask_paths):
        img_patches, mask_patches = preprocess_image_mask(img_path, mask_path, patch_size, stride)
        all_images.extend(img_patches)
        all_masks.extend(mask_patches)

    return np.array(all_images), np.array(all_masks)

# -------------------------------
# ✅ Load and Preprocess the Entire Dataset
# -------------------------------
print("🔄 Loading dataset...")
X, Y = load_dataset(DATASET_DIR, patch_size=160, stride=160)
print(f"✅ Dataset Loaded: {X.shape[0]} patches.")

# -------------------------------
# ✅ Create TensorFlow Dataset
# -------------------------------
def create_tf_dataset(X, Y, batch_size=4):
    """Create a TensorFlow dataset from preprocessed images and masks."""
    if Y.ndim == 4 and Y.shape[-1] == 1:
        Y = np.squeeze(Y, axis=-1)
    dataset = tf.data.Dataset.from_tensor_slices((X, Y))
    dataset = dataset.shuffle(len(X)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

# ✅ Create the TensorFlow dataset
dataset = create_tf_dataset(X, Y)
print(f"✅ TensorFlow Dataset Created with {len(X)} samples.")


🔄 Loading dataset...
📁 Tile 1: Found 9 images, 9 masks.
📁 Tile 2: Found 9 images, 9 masks.
📁 Tile 3: Found 9 images, 9 masks.
📁 Tile 4: Found 9 images, 9 masks.
📁 Tile 5: Found 9 images, 9 masks.
📁 Tile 6: Found 9 images, 9 masks.
📁 Tile 7: Found 9 images, 9 masks.
📁 Tile 8: Found 9 images, 9 masks.
✅ Dataset Loaded: 3483 patches.
✅ TensorFlow Dataset Created with 3483 samples.


In [None]:
import tensorflow as tf

def mean_iou(y_true, y_pred, num_classes=6):
    y_pred = tf.argmax(y_pred, axis=-1)  # Convert predicted probabilities to class indices

    # ✅ Ensure y_true has the same shape as y_pred
    if len(y_true.shape) == 4:  # Check if y_true has a channel dimension
        y_true = tf.squeeze(y_true, axis=-1)  # Remove extra channel if present

    iou_list = []
    for i in range(num_classes):
        intersection = tf.reduce_sum(tf.cast((y_true == i) & (y_pred == i), tf.float32))
        union = tf.reduce_sum(tf.cast((y_true == i) | (y_pred == i), tf.float32))
        iou = intersection / (union + tf.keras.backend.epsilon())  # Avoid division by zero
        iou_list.append(iou)

    return tf.reduce_mean(tf.stack(iou_list))

In [None]:
from sklearn.model_selection import train_test_split

X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.1,random_state=42)
Y_train=np.expand_dims(Y_train,axis=-1)
Y_test=np.expand_dims(Y_test,axis=-1)

print("Training set:", X_train.shape, Y_train.shape)
print("Testing set:", X_test.shape, Y_test.shape)

train_ds=create_tf_dataset(X_train,Y_train,batch_size=4)
test_ds=create_tf_dataset(X_test,Y_test,batch_size=4)
for images, masks in train_ds.take(1):
    print(f"Image batch shape: {images.shape}")  # Expected: (batch, 160, 160, 3)
    print(f"Mask batch shape: {masks.shape}")
batch_size=4
model=Unet_model(input_shape=(160,160,3),num_classes=6)
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy',mean_iou])
model.fit(train_ds,epochs=20,verbose=1)

Training set: (3134, 160, 160, 3) (3134, 160, 160, 1)
Testing set: (349, 160, 160, 3) (349, 160, 160, 1)
Image batch shape: (4, 160, 160, 3)
Mask batch shape: (4, 160, 160)
Epoch 1/20
[1m784/784[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 169ms/step - accuracy: 0.5321 - loss: 1.3980 - mean_iou: 0.1122
Epoch 2/20
[1m784/784[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 161ms/step - accuracy: 0.6574 - loss: 0.9725 - mean_iou: 0.2415
Epoch 3/20
[1m784/784[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 161ms/step - accuracy: 0.6783 - loss: 0.8760 - mean_iou: 0.3185
Epoch 4/20
[1m784/784[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 161ms/step - accuracy: 0.7047 - loss: 0.8112 - mean_iou: 0.3637
Epoch 5/20
[1m784/784[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 162ms/step - accuracy: 0.7296 - loss: 0.7609 - mean_iou: 0.4017
Epoch 6/20
[1m784/784[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 161ms/step - accuracy: 0.7418 - l

<keras.src.callbacks.history.History at 0x7cb376db5c50>

In [None]:
accuracy=model.evaluate(X_test,Y_test,verbose=1)
print('Accuracy: %.2f' % (accuracy[1]*100))

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 428ms/step - accuracy: 0.7420 - loss: 0.7105 - mean_iou: 0.5232
Accuracy: 74.35
