# Clip WorldCover to AOI
## Cut WorldCover tile to your Area of Interest

This notebook clips a large WorldCover tile to your specific AOI using a GeoJSON file.

## 1. Import Libraries

In [None]:
import os
import numpy as np
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from rasterio.warp import calculate_default_transform, reproject, Resampling
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully")

## 2. Configuration

In [None]:
# Input paths
WORLDCOVER_DIR = "/Users/tyomachka/Desktop/WU/Data_Lab.TMP/rep.infrared.city/WorldCover_Artem"
GEOJSON_DIR = "/Users/tyomachka/Desktop/WU/Data_Lab.TMP/rep.infrared.city/aois_json"
SENTINEL_DIR = "/Users/tyomachka/Desktop/WU/Data_Lab.TMP/rep.infrared.city/sentinel_data"

# Output directory
OUTPUT_DIR = "/Users/tyomachka/Desktop/WU/Data_Lab.TMP/rep.infrared.city/WorldCover_Clipped"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Map cities to their WorldCover files
CITIES = {
    "London": "London_WC.tif",
    "Melbourne": "Melbourne_WC.tif",
    "Paris": "Paris_WC.tif",
    "San_Francisco": "San_Francisco_WC.tif",
    "Seattle": "Seattle.tif"
}

print("Configuration:")
print(f"  WorldCover folder: {WORLDCOVER_DIR}")
print(f"  GeoJSON folder: {GEOJSON_DIR}")
print(f"  Sentinel folder: {SENTINEL_DIR}")
print(f"  Output folder: {OUTPUT_DIR}")
print(f"\nCities to process: {list(CITIES.keys())}")

## 3. Load and Inspect Input Files

In [None]:
print("="*70)
print("CHECKING INPUT FILES AND TARGET CRS")
print("="*70)

# Check all files exist and get target CRS from OSM labels
target_crs = {}

for city, wc_file in CITIES.items():
    wc_path = os.path.join(WORLDCOVER_DIR, wc_file)
    geojson_path = os.path.join(GEOJSON_DIR, f"{city}.geojson")
    osm_path = os.path.join(SENTINEL_DIR, city, f"{city}_OSM_labels.tif")
    
    wc_exists = os.path.exists(wc_path)
    geojson_exists = os.path.exists(geojson_path)
    osm_exists = os.path.exists(osm_path)
    
    print(f"\n{city}:")
    print(f"  {'OK' if wc_exists else 'MISSING'} WorldCover: {wc_file}")
    print(f"  {'OK' if geojson_exists else 'MISSING'} GeoJSON: {city}.geojson")
    print(f"  {'OK' if osm_exists else 'MISSING'} OSM Labels: {city}_OSM_labels.tif")
    
    if osm_exists:
        with rasterio.open(osm_path) as src:
            target_crs[city] = src.crs
            print(f"  Target CRS: {src.crs}")

print("\n" + "="*70)
print("WorldCover will be reprojected to match each city's UTM zone")
print("="*70)

## 4. Clip WorldCover to AOI

In [None]:
print("="*70)
print("CLIPPING & REPROJECTING WORLDCOVER TO MATCH OSM LABELS")
print("="*70)

results = {}

for city, wc_file in CITIES.items():
    print(f"\nProcessing {city}...")
    
    wc_path = os.path.join(WORLDCOVER_DIR, wc_file)
    geojson_path = os.path.join(GEOJSON_DIR, f"{city}.geojson")
    osm_path = os.path.join(SENTINEL_DIR, city, f"{city}_OSM_labels.tif")
    output_path = os.path.join(OUTPUT_DIR, f"{city}_WorldCover_clipped.tif")
    
    # Get target CRS and bounds from OSM labels
    with rasterio.open(osm_path) as osm_src:
        dst_crs = osm_src.crs
        dst_bounds = osm_src.bounds
        dst_width = osm_src.width
        dst_height = osm_src.height
        dst_transform = osm_src.transform
    
    print(f"  Target CRS: {dst_crs}")
    print(f"  Target shape: {dst_height} x {dst_width}")
    
    # Open WorldCover and reproject to match OSM labels exactly
    with rasterio.open(wc_path) as src:
        # Create output array
        dst_data = np.zeros((dst_height, dst_width), dtype=src.dtypes[0])
        
        # Reproject WorldCover to match OSM labels grid exactly
        reproject(
            source=rasterio.band(src, 1),
            destination=dst_data,
            src_transform=src.transform,
            src_crs=src.crs,
            dst_transform=dst_transform,
            dst_crs=dst_crs,
            resampling=Resampling.nearest  # Use nearest for categorical data
        )
        
        # Save the reprojected and clipped WorldCover
        out_meta = {
            "driver": "GTiff",
            "height": dst_height,
            "width": dst_width,
            "count": 1,
            "dtype": dst_data.dtype,
            "crs": dst_crs,
            "transform": dst_transform,
            "compress": "lzw"
        }
        
        with rasterio.open(output_path, "w", **out_meta) as dest:
            dest.write(dst_data, 1)
    
    file_size = os.path.getsize(output_path) / (1024**2)
    results[city] = {
        "output": output_path,
        "shape": (dst_height, dst_width),
        "size_mb": file_size,
        "crs": str(dst_crs)
    }
    
    print(f"  Saved: {output_path}")
    print(f"  File size: {file_size:.2f} MB")

print("\n" + "="*70)
print("ALL CITIES PROCESSED!")
print("="*70)

## 5. Analyze Clipped WorldCover

In [None]:
print("="*70)
print("WORLDCOVER ANALYSIS - ALL CITIES")
print("="*70)

# WorldCover class names
class_names = {
    10: "Tree cover",
    20: "Shrubland",
    30: "Grassland",
    40: "Cropland",
    50: "Built-up",
    60: "Bare/sparse vegetation",
    70: "Snow and ice",
    80: "Permanent water bodies",
    90: "Herbaceous wetland",
    95: "Mangroves",
    100: "Moss and lichen"
}

green_classes = [10, 20, 30]  # As per your Artem.ipynb configuration

for city in CITIES.keys():
    output_path = os.path.join(OUTPUT_DIR, f"{city}_WorldCover_clipped.tif")
    
    with rasterio.open(output_path) as src:
        data = src.read(1)
        crs = src.crs
    
    total_pixels = data.size
    unique_classes, counts = np.unique(data, return_counts=True)
    
    green_pixels = sum(count for cls, count in zip(unique_classes, counts) if cls in green_classes)
    green_pct = 100 * green_pixels / total_pixels
    
    print(f"\n{city} (CRS: {crs}):")
    print(f"  Shape: {data.shape}")
    print(f"  Total pixels: {total_pixels:,}")
    print(f"  Green pixels (10,20,30): {green_pixels:,} ({green_pct:.2f}%)")
    print(f"  Classes present: {list(unique_classes)}")

print("\n" + "="*70)

## 6. Visualize Clipped WorldCover

In [None]:
# Create visualization comparing WorldCover with OSM labels
fig, axes = plt.subplots(len(CITIES), 3, figsize=(15, 4*len(CITIES)))

for i, city in enumerate(CITIES.keys()):
    wc_path = os.path.join(OUTPUT_DIR, f"{city}_WorldCover_clipped.tif")
    osm_path = os.path.join(SENTINEL_DIR, city, f"{city}_OSM_labels.tif")
    
    with rasterio.open(wc_path) as src:
        wc_data = src.read(1)
    
    with rasterio.open(osm_path) as src:
        osm_data = src.read(1)
    
    # WorldCover binary (green classes)
    wc_green = np.isin(wc_data, green_classes).astype(np.uint8)
    
    # WorldCover classification
    axes[i, 0].imshow(wc_data, cmap='tab10', interpolation='nearest')
    axes[i, 0].set_title(f'{city} - WorldCover Classes', fontsize=11, fontweight='bold')
    axes[i, 0].axis('off')
    
    # WorldCover binary green
    axes[i, 1].imshow(wc_green, cmap='RdYlGn', vmin=0, vmax=1, interpolation='nearest')
    green_pct = 100 * wc_green.sum() / wc_green.size
    axes[i, 1].set_title(f'{city} - WorldCover Green ({green_pct:.1f}%)', fontsize=11, fontweight='bold')
    axes[i, 1].axis('off')
    
    # OSM labels
    axes[i, 2].imshow(osm_data, cmap='RdYlGn', vmin=0, vmax=1, interpolation='nearest')
    osm_pct = 100 * osm_data.sum() / osm_data.size
    axes[i, 2].set_title(f'{city} - OSM Labels ({osm_pct:.1f}%)', fontsize=11, fontweight='bold')
    axes[i, 2].axis('off')

plt.suptitle('WorldCover vs OSM Labels Comparison (Same Grid)', fontsize=14, fontweight='bold', y=1.01)
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "worldcover_osm_comparison.png"), dpi=150, bbox_inches='tight')
plt.show()

print(f"\nVisualization saved to {OUTPUT_DIR}/worldcover_osm_comparison.png")

## 7. Summary

In [None]:
print("="*70)
print("SUMMARY")
print("="*70)

print(f"\nOutput folder: {OUTPUT_DIR}")
print(f"\nClipped files created:")
for city in CITIES.keys():
    output_path = os.path.join(OUTPUT_DIR, f"{city}_WorldCover_clipped.tif")
    size_mb = os.path.getsize(output_path) / (1024**2)
    print(f"  {city}_WorldCover_clipped.tif ({size_mb:.2f} MB)")

print(f"\nGreen classes used: {green_classes} (Tree, Shrubland, Grassland)")

print(f"\nTo use in Artem.ipynb, update worldcover_paths:")
print("worldcover_paths = {")
for city in CITIES.keys():
    print(f'    "{city}": "{OUTPUT_DIR}/{city}_WorldCover_clipped.tif",')
print("}")

print("\n" + "="*70)