<a href="https://colab.research.google.com/github/zztanmayzz/zigzaggerz/blob/main/feature_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
!pip install tensorflow>=2.8.0 opencv-python numpy matplotlib scikit-learn rasterio geopandas

In [14]:
import os
import cv2
import numpy as np
import tensorflow as tf
import rasterio
from tensorflow import keras
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt

In [15]:
# =============================================================================
# 1. Model Definition: Multi-Task U-Net
# =============================================================================

class MultiFeatureExtractor:
    def __init__(self, input_shape=(256,256,3), num_classes_terrain=2):
        self.input_shape = input_shape
        self.num_classes_terrain = num_classes_terrain
        self.model = self.build_model()
        self.compile_model()

    def build_unet_backbone(self, inputs):
        c1 = layers.Conv2D(64,3,activation='relu',padding='same')(inputs)
        c1 = layers.Conv2D(64,3,activation='relu',padding='same')(c1)
        p1 = layers.MaxPooling2D()(c1)
        c2 = layers.Conv2D(128,3,activation='relu',padding='same')(p1)
        c2 = layers.Conv2D(128,3,activation='relu',padding='same')(c2)
        p2 = layers.MaxPooling2D()(c2)
        c3 = layers.Conv2D(256,3,activation='relu',padding='same')(p2)
        c3 = layers.Conv2D(256,3,activation='relu',padding='same')(c3)
        p3 = layers.MaxPooling2D()(c3)
        c4 = layers.Conv2D(512,3,activation='relu',padding='same')(p3)
        c4 = layers.Conv2D(512,3,activation='relu',padding='same')(c4)
        p4 = layers.MaxPooling2D()(c4)
        b  = layers.Conv2D(1024,3,activation='relu',padding='same')(p4)
        b  = layers.Conv2D(1024,3,activation='relu',padding='same')(b)
        return [c1,c2,c3,c4], b

    def build_decoder_branch(self, x, skips, out_ch, activation, name):
        c1,c2,c3,c4 = skips
        u1 = layers.UpSampling2D()(x)
        u1 = layers.Conv2D(512,2,activation='relu',padding='same')(u1)
        m1 = layers.concatenate([c4,u1])
        c5 = layers.Conv2D(512,3,activation='relu',padding='same')(m1)
        c5 = layers.Conv2D(512,3,activation='relu',padding='same')(c5)
        u2 = layers.UpSampling2D()(c5)
        u2 = layers.Conv2D(256,2,activation='relu',padding='same')(u2)
        m2 = layers.concatenate([c3,u2])
        c6 = layers.Conv2D(256,3,activation='relu',padding='same')(m2)
        c6 = layers.Conv2D(256,3,activation='relu',padding='same')(c6)
        u3 = layers.UpSampling2D()(c6)
        u3 = layers.Conv2D(128,2,activation='relu',padding='same')(u3)
        m3 = layers.concatenate([c2,u3])
        c7 = layers.Conv2D(128,3,activation='relu',padding='same')(m3)
        c7 = layers.Conv2D(128,3,activation='relu',padding='same')(c7)
        u4 = layers.UpSampling2D()(c7)
        u4 = layers.Conv2D(64,2,activation='relu',padding='same')(u4)
        m4 = layers.concatenate([c1,u4])
        c8 = layers.Conv2D(64,3,activation='relu',padding='same')(m4)
        c8 = layers.Conv2D(64,3,activation='relu',padding='same')(c8)
        return layers.Conv2D(out_ch,1,activation=activation, name=name)(c8)

    def build_model(self):
        inp = layers.Input(self.input_shape)
        skips, bridge = self.build_unet_backbone(inp)
        roads   = self.build_decoder_branch(bridge, skips, 1, 'sigmoid', 'roads')
        water   = self.build_decoder_branch(bridge, skips, 1, 'sigmoid', 'water')
        elev    = self.build_decoder_branch(bridge, skips, 1, 'linear',  'elevation')
        terrain = self.build_decoder_branch(bridge, skips, self.num_classes_terrain, 'softmax', 'terrain')
        return models.Model(inp, [roads, water, elev, terrain])

    def compile_model(self):
        losses = {
            'roads':     'binary_crossentropy',
            'water':     'binary_crossentropy',
            'elevation': 'mse',
            'terrain':   'categorical_crossentropy'
        }
        loss_weights = {'roads':1.0,'water':1.0,'elevation':0.5,'terrain':1.0}
        metrics = {
            'roads':     ['accuracy', tf.keras.metrics.MeanIoU(num_classes=2)],
            'water':     ['accuracy', tf.keras.metrics.MeanIoU(num_classes=2)],
            'elevation': ['mae'],
            'terrain':   ['accuracy']
        }
        self.model.compile(optimizer='adam',
                           loss=losses,
                           loss_weights=loss_weights,
                           metrics=metrics)

In [16]:
#For uploading Kaggle API Token
#from google.colab import files
#files.upload()

In [17]:
#Moving the API key
#!mkdir -p ~/.kaggle && mv "kaggle(1).json" ~/.kaggle/kaggle.json && chmod 600 ~/.kaggle/kaggle.json

In [28]:
#Gather dataset
!mkdir -p spacenet/images spacenet/masks

# Example file URLs (replace with actual file names)
!wget https://spacenet-dataset.s3.amazonaws.com/SpaceNet_AOI_1_Rio_Raw/RGB-PanSharpen/AOI_1_Rio_000000_0_0_RGB.tif -P spacenet/images/
!wget https://spacenet-dataset.s3.amazonaws.com/SpaceNet_AOI_1_Rio_Buildings/AOI_1_Rio_000000_0_0_buildings.png -P spacenet/masks/


--2025-09-21 10:50:46--  https://spacenet-dataset.s3.amazonaws.com/SpaceNet_AOI_1_Rio_Raw/RGB-PanSharpen/AOI_1_Rio_000000_0_0_RGB.tif
Resolving spacenet-dataset.s3.amazonaws.com (spacenet-dataset.s3.amazonaws.com)... 52.216.248.212, 52.217.233.209, 54.231.236.129, ...
Connecting to spacenet-dataset.s3.amazonaws.com (spacenet-dataset.s3.amazonaws.com)|52.216.248.212|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2025-09-21 10:50:46 ERROR 404: Not Found.

--2025-09-21 10:50:46--  https://spacenet-dataset.s3.amazonaws.com/SpaceNet_AOI_1_Rio_Buildings/AOI_1_Rio_000000_0_0_buildings.png
Resolving spacenet-dataset.s3.amazonaws.com (spacenet-dataset.s3.amazonaws.com)... 52.216.248.212, 52.217.233.209, 54.231.236.129, ...
Connecting to spacenet-dataset.s3.amazonaws.com (spacenet-dataset.s3.amazonaws.com)|52.216.248.212|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2025-09-21 10:50:46 ERROR 404: Not Found.



In [19]:
# =============================================================================
# 2. Data Pipeline (with Rasterio for elevation)
# =============================================================================

TARGET_SIZE = (256,256)

def load_elevation_with_rasterio(path, target_size=TARGET_SIZE):
    with rasterio.open(path) as src:
        elev = src.read(1).astype('float32')
    elev = (elev - elev.min()) / (elev.max() - elev.min() + 1e-8)
    elev = tf.image.resize(elev[..., np.newaxis], target_size)
    return elev

def parse_image_and_masks(img_path, roads_path, water_path, elev_path, terrain_path):
    img = tf.io.read_file(img_path)
    img = tf.image.decode_png(img, channels=3)
    img = tf.image.resize(img, TARGET_SIZE) / 255.0

    r = tf.io.read_file(roads_path)
    r = tf.image.decode_png(r, channels=1)
    r = tf.image.resize(r, TARGET_SIZE) / 255.0

    w = tf.io.read_file(water_path)
    w = tf.image.decode_png(w, channels=1)
    w = tf.image.resize(w, TARGET_SIZE) / 255.0

    def _load_elev(p):
        return load_elevation_with_rasterio(p.numpy().decode(), TARGET_SIZE)
    e = tf.py_function(_load_elev, [elev_path], tf.float32)
    e.set_shape([*TARGET_SIZE,1])

    t = tf.io.read_file(terrain_path)
    t = tf.image.decode_png(t, channels=1)
    t = tf.image.resize(t, TARGET_SIZE) / 255.0
    t = tf.one_hot(tf.squeeze(tf.cast(t>0.5, tf.int32), axis=-1), depth=2)

    return img, {'roads':r, 'water':w, 'elevation':e, 'terrain':t}

def create_dataset(img_paths, roads_paths, water_paths, elev_paths, terrain_paths,
                   batch=8, shuffle=True):
    ds = tf.data.Dataset.from_tensor_slices(
        (img_paths, roads_paths, water_paths, elev_paths, terrain_paths))
    if shuffle:
        ds = ds.shuffle(len(img_paths))
    ds = ds.map(parse_image_and_masks, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch).prefetch(tf.data.AUTOTUNE)
    return ds


In [20]:
# =============================================================================
# 3. Training Script
# =============================================================================

def train_pipeline(train_imgs, train_roads, train_water,
                   train_elev,  train_terrain,
                   val_imgs,   val_roads,   val_water,
                   val_elev,    val_terrain,
                   epochs=50):
    extractor = MultiFeatureExtractor(input_shape=(*TARGET_SIZE,3))
    train_ds = create_dataset(train_imgs, train_roads, train_water,
                              train_elev, train_terrain, batch=8, shuffle=True)
    val_ds   = create_dataset(val_imgs,   val_roads,   val_water,
                              val_elev,   val_terrain,   batch=8, shuffle=False)
    extractor.model.fit(train_ds,
                        validation_data=val_ds,
                        epochs=epochs,
                        callbacks=[
                            keras.callbacks.ModelCheckpoint('best_model.h5', save_best_only=True),
                            keras.callbacks.ReduceLROnPlateau(patience=5),
                            keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
                        ])
    return extractor


In [21]:
# =============================================================================
# 4. Inference Utility
# =============================================================================

def preprocess_raw_image(path, input_size=TARGET_SIZE):
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, input_size) / 255.0
    return np.expand_dims(img.astype('float32'), axis=0)

def run_inference(model, raw_img_path):
    batch = preprocess_raw_image(raw_img_path)
    preds = model.predict(batch)
    roads_prob = preds[0][0,:,:,0]
    water_prob = preds[1][0,:,:,0]
    elev_map   = preds[2][0,:,:,0]
    terrain_cls= np.argmax(preds[3][0], axis=-1)
    return roads_prob, water_prob, elev_map, terrain_cls

In [22]:
# =============================================================================
# 5. Visualization Example
# =============================================================================

def visualize_predictions(raw_path, roads, water, elev, terrain):
    img = cv2.cvtColor(cv2.imread(raw_path), cv2.COLOR_BGR2RGB)
    h, w, _ = img.shape
    roads   = cv2.resize(roads,   (w,h))
    water   = cv2.resize(water,   (w,h))
    elev    = cv2.resize(elev,    (w,h))
    terrain = cv2.resize(terrain.astype('float32'), (w,h))

    plt.figure(figsize=(12,8))
    plt.subplot(2,3,1); plt.imshow(img);      plt.title('Original'); plt.axis('off')
    plt.subplot(2,3,2); plt.imshow(roads, cmap='Reds');    plt.title('Roads');    plt.axis('off')
    plt.subplot(2,3,3); plt.imshow(water, cmap='Blues');   plt.title('Water');    plt.axis('off')
    plt.subplot(2,3,4); plt.imshow(elev,  cmap='terrain'); plt.title('Elevation'); plt.axis('off')
    plt.subplot(2,3,5); plt.imshow(terrain, cmap='viridis');plt.title('Terrain');   plt.axis('off')
    plt.tight_layout(); plt.show()

In [23]:
# =============================================================================
# 6. Example Usage: Ensure Mask Files Exist
# =============================================================================

if __name__ == '__main__':
    import glob

    IMG_TRAIN_DIR    = 'images/train/'
    MASK_TRAIN_DIR   = 'masks/train/'
    IMG_VAL_DIR      = 'images/val/'
    MASK_VAL_DIR     = 'masks/val/'

    train_imgs = sorted(glob.glob(os.path.join(IMG_TRAIN_DIR, 'img_*.png')))
    val_imgs = sorted(glob.glob(os.path.join(IMG_VAL_DIR, 'img_*.png')))

    train_roads, train_water, train_elev, train_terrain = [], [], [], []
    for img_path in train_imgs:
        base = os.path.splitext(os.path.basename(img_path))[0].split('_')[-1]
        roads_p   = os.path.join(MASK_TRAIN_DIR, f'roads_{base}.png')
        water_p   = os.path.join(MASK_TRAIN_DIR, f'water_{base}.png')
        elev_p    = os.path.join(MASK_TRAIN_DIR, f'elev_{base}.tif')
        terrain_p = os.path.join(MASK_TRAIN_DIR, f'terrain_{base}.png')
        if all(os.path.exists(p) for p in (roads_p, water_p, elev_p, terrain_p)):
            train_roads.append(roads_p)
            train_water.append(water_p)
            train_elev.append(elev_p)
            train_terrain.append(terrain_p)

    val_roads, val_water, val_elev, val_terrain = [], [], [], []
    for img_path in val_imgs:
        base = os.path.splitext(os.path.basename(img_path))[0].split('_')[-1]
        roads_p   = os.path.join(MASK_VAL_DIR, f'roads_{base}.png')
        water_p   = os.path.join(MASK_VAL_DIR, f'water_{base}.png')
        elev_p    = os.path.join(MASK_VAL_DIR, f'elev_{base}.tif')
        terrain_p = os.path.join(MASK_VAL_DIR, f'terrain_{base}.png')
        if all(os.path.exists(p) for p in (roads_p, water_p, elev_p, terrain_p)):
            val_roads.append(roads_p)
            val_water.append(water_p)
            val_elev.append(elev_p)
            val_terrain.append(terrain_p)

    print(f'Training on {len(train_roads)} samples.')
    print(f'Validation on {len(val_roads)} samples.')

    if len(train_roads) == 0 or len(val_roads) == 0:
        raise RuntimeError("No matching mask files found. Please check your directory paths and mask filenames.")

    extractor = train_pipeline(
        train_imgs, train_roads, train_water, train_elev, train_terrain,
        val_imgs, val_roads, val_water, val_elev, val_terrain,
        epochs=30
    )

    extractor.model.load_weights('best_model.h5')
    raw_tile = 'isro_tiles/tile_000.png'
    roads_p, water_p, elev_p, terrain_c = run_inference(extractor.model, raw_tile)
    visualize_predictions(raw_tile, roads_p, water_p, elev_p, terrain_c)

Training on 0 samples.
Validation on 0 samples.


RuntimeError: No matching mask files found. Please check your directory paths and mask filenames.

In [30]:
!aws s3 ls --no-sign-request s3://spacenet-dataset/SpaceNet_AOI_1_Rio_Raw/RGB-PanSharpen/ | head -20
!aws s3 ls --no-sign-request s3://spacenet-dataset/SpaceNet_AOI_1_Rio_Buildings/ | head -20
