<a href="https://colab.research.google.com/github/zztanmayzz/zigzaggerz/blob/main/feature_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [75]:
#!pip install tensorflow>=2.8.0 opencv-python numpy matplotlib scikit-learn rasterio geopandas

In [76]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import cv2
import matplotlib.pyplot as plt
import glob

In [77]:
# --- U-Net Model Definition ---
class MultiFeatureExtractor:
    def __init__(self, input_shape=(512,512,3)):
        self.input_shape = input_shape
        self.model = self.build_model()
        self.compile_model()

    def build_unet_backbone(self, inputs):
        c1 = layers.Conv2D(64, 3, activation='relu', padding='same')(inputs)
        c1 = layers.Conv2D(64, 3, activation='relu', padding='same')(c1)
        p1 = layers.MaxPooling2D()(c1)
        c2 = layers.Conv2D(128, 3, activation='relu', padding='same')(p1)
        c2 = layers.Conv2D(128, 3, activation='relu', padding='same')(c2)
        p2 = layers.MaxPooling2D()(c2)
        c3 = layers.Conv2D(256, 3, activation='relu', padding='same')(p2)
        c3 = layers.Conv2D(256, 3, activation='relu', padding='same')(c3)
        p3 = layers.MaxPooling2D()(c3)
        c4 = layers.Conv2D(512, 3, activation='relu', padding='same')(p3)
        c4 = layers.Conv2D(512, 3, activation='relu', padding='same')(c4)
        p4 = layers.MaxPooling2D()(c4)
        b = layers.Conv2D(1024, 3, activation='relu', padding='same')(p4)
        b = layers.Conv2D(1024, 3, activation='relu', padding='same')(b)
        return [c1, c2, c3, c4], b

    def build_decoder_branch(self, x, skips, out_ch, activation, name):
        c1, c2, c3, c4 = skips
        u1 = layers.UpSampling2D()(x)
        u1 = layers.Conv2D(512, 2, activation='relu', padding='same')(u1)
        m1 = layers.concatenate([c4, u1])
        c5 = layers.Conv2D(512, 3, activation='relu', padding='same')(m1)
        c5 = layers.Conv2D(512, 3, activation='relu', padding='same')(c5)
        u2 = layers.UpSampling2D()(c5)
        u2 = layers.Conv2D(256, 2, activation='relu', padding='same')(u2)
        m2 = layers.concatenate([c3, u2])
        c6 = layers.Conv2D(256, 3, activation='relu', padding='same')(m2)
        c6 = layers.Conv2D(256, 3, activation='relu', padding='same')(c6)
        u3 = layers.UpSampling2D()(c6)
        u3 = layers.Conv2D(128, 2, activation='relu', padding='same')(u3)
        m3 = layers.concatenate([c2, u3])
        c7 = layers.Conv2D(128, 3, activation='relu', padding='same')(m3)
        c7 = layers.Conv2D(128, 3, activation='relu', padding='same')(c7)
        u4 = layers.UpSampling2D()(c7)
        u4 = layers.Conv2D(64, 2, activation='relu', padding='same')(u4)
        m4 = layers.concatenate([c1, u4])
        c8 = layers.Conv2D(64, 3, activation='relu', padding='same')(m4)
        c8 = layers.Conv2D(64, 3, activation='relu', padding='same')(c8)
        return layers.Conv2D(out_ch, 1, activation=activation, name=name)(c8)

    def build_model(self):
        inp = layers.Input(self.input_shape)
        skips, bridge = self.build_unet_backbone(inp)
        roads = self.build_decoder_branch(bridge, skips, 1, 'sigmoid', 'roads')
        return models.Model(inp, roads)

    def compile_model(self):
        self.model.compile(optimizer='adam',
                           loss='binary_crossentropy',
                           metrics=['accuracy', tf.keras.metrics.MeanIoU(num_classes=2)])

In [78]:
#For uploading Kaggle API Token
#from google.colab import files
#files.upload()

In [79]:
#Moving the API key
#!mkdir -p ~/.kaggle && mv "kaggle(1).json" ~/.kaggle/kaggle.json && chmod 600 ~/.kaggle/kaggle.json

In [80]:
#Gather dataset
# Download and unzip DeepGlobe Road Extraction Dataset
#!kaggle datasets download -d balraj98/deepglobe-road-extraction-dataset
#!unzip -q deepglobe-road-extraction-dataset.zip -d deepglobe_data


In [81]:
import glob
import tensorflow as tf
import os

# Functions to load images and masks
def load_image_mask(img_path, mask_path):
    image = tf.io.read_file(img_path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.resize(image, (512,512))
    image = image / 255.0

    mask = tf.io.read_file(mask_path)
    mask = tf.image.decode_png(mask, channels=1)
    mask = tf.image.resize(mask, (512,512))
    mask = mask / 255.0
    mask = tf.round(mask)

    return image, mask

def data_generator(img_files, mask_files, batch_size=8, repeat=False):
    dataset = tf.data.Dataset.from_tensor_slices((img_files, mask_files))
    dataset = dataset.shuffle(1000)
    dataset = dataset.map(load_image_mask, num_parallel_calls=tf.data.AUTOTUNE)
    if repeat:
        dataset = dataset.repeat()
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

# Collect image and mask paths
IMG_DIR = 'deepglobe_data/train/images'
MASK_DIR = 'deepglobe_data/train/masks'

img_files = sorted(glob.glob(os.path.join(IMG_DIR, '*.png')))
mask_files = sorted(glob.glob(os.path.join(MASK_DIR, '*.png')))

# Train-validation split
split_idx = int(0.8 * len(img_files))
train_imgs, val_imgs = img_files[:split_idx], img_files[split_idx:]
train_masks, val_masks = mask_files[:split_idx], mask_files[split_idx:]

# Convert to TensorFlow string tensors
train_imgs = tf.convert_to_tensor(train_imgs, dtype=tf.string)
train_masks = tf.convert_to_tensor(train_masks, dtype=tf.string)
val_imgs = tf.convert_to_tensor(val_imgs, dtype=tf.string)
val_masks = tf.convert_to_tensor(val_masks, dtype=tf.string)

# Set batch size
batch_size = 8

# Create TensorFlow datasets
train_ds = data_generator(train_imgs, train_masks, batch_size=batch_size, repeat=True)
val_ds = data_generator(val_imgs, val_masks, batch_size=batch_size)

# Instantiate model
extractor = MultiFeatureExtractor(input_shape=(512,512,3))


In [82]:
# Calculate steps per epoch
steps_per_epoch = max(len(train_imgs) // batch_size, 1)
validation_steps = max(len(val_imgs) // batch_size, 1)

# Train the model with explicit steps per epoch and validation steps
history = extractor.model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=20,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps
)


Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 113s/step
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step
Epoch 10/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Epoch 11/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Epoch 12/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/ste