In [3]:
import json
import os
import shapely.geometry
from shapely.ops import transform
import pyproj

# Make folder if it doesn't exist
folder = "aois_json"
os.makedirs(folder, exist_ok=True)

# City centers: (lon, lat)
cities = {
    "Vienna": (16.3738, 48.2082),
    "Paris": (2.3522, 48.8566),
    "London": (-0.1276, 51.5074),
    "Toronto": (-79.3832, 43.6532),
    "Vancouver": (-123.1207, 49.2827),
    "San Francisco": (-122.4194, 37.7749),
    "Lisbon": (-9.1393, 38.7223),
    "Madrid": (-3.7038, 40.4168),
    "Barcelona": (2.1734, 41.3851),
    "Berlin": (13.4050, 52.5200),
    "Amsterdam": (4.9041, 52.3676),
    "Melbourne": (144.9631, -37.8136),
    "Sydney": (151.2093, -33.8688),
    "Auckland": (174.7633, -36.8485),
    "Seattle": (-122.3321, 47.6062)
}

# AOI size in meters (~5.12 km to match 512x512 pixels at 10 m)
size_m = 512 * 10  

def create_square_aoi(center_lon, center_lat, size_m):
    """
    Create a square AOI of size_m x size_m around the city center.
    """
    utm_zone = int((center_lon + 180) / 6) + 1
    utm = pyproj.Proj(proj='utm', zone=utm_zone, ellps='WGS84')
    wgs84 = pyproj.Proj(proj='latlong', datum='WGS84')

    project_to_utm = pyproj.Transformer.from_proj(wgs84, utm, always_xy=True).transform
    project_to_wgs = pyproj.Transformer.from_proj(utm, wgs84, always_xy=True).transform

    point = shapely.geometry.Point(center_lon, center_lat)
    point_utm = transform(project_to_utm, point)

    half_size = size_m / 2
    square_utm = shapely.geometry.box(
        point_utm.x - half_size,
        point_utm.y - half_size,
        point_utm.x + half_size,
        point_utm.y + half_size
    )

    square_wgs = transform(project_to_wgs, square_utm)
    return square_wgs

# Generate one JSON per city
for city, (lon, lat) in cities.items():
    square = create_square_aoi(lon, lat, size_m)
    polygon_coords = [list(coord) for coord in square.exterior.coords]
    geojson = {
        "type": "FeatureCollection",
        "features": [
            {
                "type": "Feature",
                "properties": {"city": city},
                "geometry": {"type": "Polygon", "coordinates": [polygon_coords]}
            }
        ]
    }
    filepath = os.path.join(folder, f"{city.replace(' ', '_')}.geojson")
    with open(filepath, "w") as f:
        json.dump(geojson, f, indent=2)
    print(f"Saved {filepath}")

print("All 15 city AOIs saved as separate GeoJSON files in 'aois_json' folder.")

Saved aois_json\Vienna.geojson
Saved aois_json\Paris.geojson
Saved aois_json\London.geojson
Saved aois_json\Toronto.geojson
Saved aois_json\Vancouver.geojson
Saved aois_json\San_Francisco.geojson
Saved aois_json\Lisbon.geojson
Saved aois_json\Madrid.geojson
Saved aois_json\Barcelona.geojson
Saved aois_json\Berlin.geojson
Saved aois_json\Amsterdam.geojson
Saved aois_json\Melbourne.geojson
Saved aois_json\Sydney.geojson
Saved aois_json\Auckland.geojson
Saved aois_json\Seattle.geojson
All 15 city AOIs saved as separate GeoJSON files in 'aois_json' folder.


In [1]:
import rioxarray as rxr
import geopandas as gpd
import numpy as np
import xarray as xr
import os
import glob

# ------------------------------
# Paths
# ------------------------------
aoi_file = "aois_json/Amsterdam.geojson"
sentinel_base_folder = "sentinel_data"
output_file = "sentinel_data/Amsterdam/Amsterdam_MultiMonth_stack.tif"

# Define months and folders
months = ["April", "August", "November"]
month_folders = {
    "April": "Amsterdam/Amsterdam-April-10m",
    "August": "Amsterdam/Amsterdam-August-10m",
    "November": "Amsterdam/Amsterdam-November-10m"
}

# ------------------------------
# Load AOI and extract geometries as a plain list
# ------------------------------
aoi = gpd.read_file(aoi_file)

# Merge multiple features if needed
if len(aoi) > 1:
    merged_geom = aoi.unary_union
    geometries = [merged_geom]
else:
    geometries = [aoi.geometry.iloc[0]]

# Ensure geometries are in WGS84
for i, g in enumerate(geometries):
    if aoi.crs is None:
        aoi.set_crs("EPSG:4326", inplace=True)
    if aoi.crs.to_epsg() != 4326:
        geometries[i] = g.to_crs("EPSG:4326")

# ------------------------------
# Process each month
# ------------------------------
band_substrings = ["B02", "B03", "B04", "B08"]
all_band_arrays = []
all_band_names = []

for month in months:
    folder_path = os.path.join(sentinel_base_folder, month_folders[month])
    print(f"\n=== Processing {month} ===")
    
    month_band_dict = {}
    
    # Load and clip each band for this month
    for substring in band_substrings:
        matched_files = glob.glob(os.path.join(folder_path, f"*{substring}*"))
        if not matched_files:
            raise FileNotFoundError(f"No file found for band containing '{substring}' in {folder_path}")
        
        band_path = matched_files[0]
        band = rxr.open_rasterio(band_path, masked=True).squeeze()
        
        # Clip to AOI using plain list of shapely geometries
        band_clipped = band.rio.clip(geometries, crs="EPSG:4326")
        
        # Store with month-specific name
        band_name = f"{substring}-{month}"
        all_band_arrays.append(band_clipped)
        all_band_names.append(band_name)
        month_band_dict[substring] = band_clipped
        
        print(f"Loaded and clipped {band_name} -> shape: {band_clipped.shape}")
    
    # ------------------------------
    # Calculate NDVI for this month
    # ------------------------------
    nir = month_band_dict["B08"].astype(np.float32)
    red = month_band_dict["B04"].astype(np.float32)
    blue = month_band_dict["B02"].astype(np.float32)
    
    ndvi = (nir - red) / (nir + red)
    ndvi = xr.where(np.isfinite(ndvi), ndvi, np.nan)
    ndvi_name = f"NDVI-{month}"
    all_band_arrays.append(ndvi)
    all_band_names.append(ndvi_name)
    print(f"Calculated {ndvi_name} -> range: [{float(ndvi.min()):.3f}, {float(ndvi.max()):.3f}]")
    
    # ------------------------------
    # Calculate EVI
    # ------------------------------
    evi = 2.5 * (nir - red) / (nir + 6*red - 7.5*blue + 1)
    evi = xr.where(np.isfinite(evi), evi, np.nan)
    evi_name = f"EVI-{month}"
    all_band_arrays.append(evi)
    all_band_names.append(evi_name)
    print(f"Calculated {evi_name} -> range: [{float(evi.min()):.3f}, {float(evi.max()):.3f}]")
    
    # ------------------------------
    # Calculate SAVI
    # ------------------------------
    L = 0.5
    savi = ((nir - red) * (1 + L)) / (nir + red + L)
    savi = xr.where(np.isfinite(savi), savi, np.nan)
    savi_name = f"SAVI-{month}"
    all_band_arrays.append(savi)
    all_band_names.append(savi_name)
    print(f"Calculated {savi_name} -> range: [{float(savi.min()):.3f}, {float(savi.max()):.3f}]")

# ------------------------------
# Stack all bands from all months
# ------------------------------
print(f"\n=== Creating final stack ===")
stack = xr.concat(all_band_arrays, dim="band")
stack = stack.assign_coords(band=all_band_names)

# Convert entire stack to float32
stack = stack.astype(np.float32)

print(f"Stacked all bands > shape: {stack.shape}")
print(f"Total bands: {len(all_band_names)}")

# ------------------------------
# Save as GeoTIFF
# ------------------------------
stack.rio.to_raster(output_file, dtype=np.float32)
print(f"\nSaved stacked GeoTIFF: {output_file}")
print(f"Band order: {all_band_names}")

  from pandas.core import (



=== Processing April ===
Loaded and clipped B02-April -> shape: (512, 512)
Loaded and clipped B03-April -> shape: (512, 512)
Loaded and clipped B04-April -> shape: (512, 512)
Loaded and clipped B08-April -> shape: (512, 512)
Calculated NDVI-April -> range: [-0.515, 0.688]
Calculated EVI-April -> range: [-3915.000, 4470.000]
Calculated SAVI-April -> range: [-0.772, 1.032]

=== Processing August ===
Loaded and clipped B02-August -> shape: (512, 512)
Loaded and clipped B03-August -> shape: (512, 512)
Loaded and clipped B04-August -> shape: (512, 512)
Loaded and clipped B08-August -> shape: (512, 512)
Calculated NDVI-August -> range: [-0.245, 0.849]
Calculated EVI-August -> range: [-4035.000, 7515.000]
Calculated SAVI-August -> range: [-0.367, 1.273]

=== Processing November ===
Loaded and clipped B02-November -> shape: (512, 512)
Loaded and clipped B03-November -> shape: (512, 512)
Loaded and clipped B04-November -> shape: (512, 512)
Loaded and clipped B08-November -> shape: (512, 512)
C

In [2]:
import osmnx as ox
import geopandas as gpd

# Load AOI
aoi = gpd.read_file("aois_json/Amsterdam.geojson")
aoi = aoi.to_crs("EPSG:4326")  # ensure WGS84
polygon = aoi.geometry.iloc[0]  # get shapely polygon

# Define tags for green areas
tags = {
    "leisure": ["park", "garden"],
    "landuse": ["forest", "grass", "meadow", "village_green"],
    "natural": ["wood", "scrub"]
}

# Use correct function - it's directly under ox, not ox.geometries
green_features = ox.features_from_polygon(polygon, tags)

# Keep only polygons
green_features = green_features[green_features.geometry.type.isin(["Polygon", "MultiPolygon"])]

# Save to GeoJSON
green_features.to_file("sentinel_data/Amsterdam/Amsterdam_OSM_green.geojson", driver="GeoJSON")
print(f"Saved OSM green areas for Amsterdam: {len(green_features)} features")
print(f"Total area: {green_features.to_crs('EPSG:3857').area.sum() / 1e6:.2f} km²")

Saved OSM green areas for Amsterdam: 4396 features
Total area: 16.04 km²


In [3]:
import rasterio
from rasterio.features import rasterize
import geopandas as gpd
import numpy as np

# Sentinel-2 stack (multi-month)
stack_path = "sentinel_data/Amsterdam/Amsterdam_MultiMonth_stack.tif"
with rasterio.open(stack_path) as src:
    transform = src.transform
    out_shape = (src.height, src.width)
    crs = src.crs
    
# GeoJSON of green areas from OSM
osm_path = "sentinel_data/Amsterdam/Amsterdam_OSM_green.geojson"
green_features = gpd.read_file(osm_path)

# Ensure CRS matches Sentinel-2
green_features = green_features.to_crs(crs)

# Keep only polygons
green_features = green_features[green_features.geometry.type.isin(["Polygon", "MultiPolygon"])]

labels = rasterize(
    [(geom, 1) for geom in green_features.geometry],
    out_shape=out_shape,
    transform=transform,
    fill=0,
    all_touched=True,   # include pixels partially covered
    dtype="uint8"
)

label_path = "sentinel_data/Amsterdam/Amsterdam_OSM_labels.tif"
with rasterio.open(
    label_path,
    "w",
    driver="GTiff",
    height=out_shape[0],
    width=out_shape[1],
    count=1,
    dtype="uint8",
    crs=crs,
    transform=transform,
) as dst:
    dst.write(labels, 1)

print(f"Saved raster labels aligned with Sentinel-2 stack: {label_path}")

Saved raster labels aligned with Sentinel-2 stack: sentinel_data/Amsterdam/Amsterdam_OSM_labels.tif


In [4]:
import rasterio
import numpy as np

# Load Sentinel-2 stack
with rasterio.open("sentinel_data/Amsterdam/Amsterdam_MultiMonth_stack.tif") as src:
    X = src.read()  # shape: (bands, height, width)

# Load labels
with rasterio.open("sentinel_data/Amsterdam/Amsterdam_OSM_labels.tif") as src:
    y = src.read(1)  # shape: (height, width)

# Flatten to (n_samples, n_features)
n_bands, h, w = X.shape
X_flat = X.reshape(n_bands, -1).T  # shape: (h*w, n_bands)
y_flat = y.flatten()                # shape: (h*w,)

mask = ~np.isnan(X_flat).any(axis=1)
X_flat = X_flat[mask]
y_flat = y_flat[mask]

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_flat, y_flat, test_size=0.2, random_state=42)

# Train Random Forest
clf = RandomForestClassifier(
    n_estimators=200,
    max_depth=25,
    class_weight="balanced",
    n_jobs=-1,
    random_state=42
)
clf.fit(X_train, y_train)

# Evaluate
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.88      0.93      0.90     40164
           1       0.72      0.57      0.64     12236

    accuracy                           0.85     52400
   macro avg       0.80      0.75      0.77     52400
weighted avg       0.84      0.85      0.84     52400

