# Clip WorldCover to AOI
## Cut WorldCover tile to your Area of Interest

This notebook clips a large WorldCover tile to your specific AOI using a GeoJSON file.

## 1. Import Libraries

In [None]:
import os
import numpy as np
import geopandas as gpd
import rasterio
from rasterio.mask import mask
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

print("✓ Libraries imported successfully")

## 2. Configuration

In [None]:
# Input paths
WORLDCOVER_FILE = "/Users/timgotschim/Downloads/WORLDCOVER/ESA_WORLDCOVER_10M_2020_V100/S2RGBNIR/ESA_WorldCover_10m_2020_v100_S34E151_S2RGBNIR.tif"
GEOJSON_FILE = "/Users/timgotschim/Documents/LLM/infrared.city/sentinel_data/Sydney.geojson"

# Output path
OUTPUT_DIR = "/Users/timgotschim/Documents/LLM/infrared.city/worldcover"
OUTPUT_FILE = os.path.join(OUTPUT_DIR, "Sydney_WorldCover_2021.tif")

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("Configuration:")
print(f"  Input WorldCover: {WORLDCOVER_FILE}")
print(f"  Input GeoJSON: {GEOJSON_FILE}")
print(f"  Output file: {OUTPUT_FILE}")

## 3. Load and Inspect Input Files

In [None]:
print("="*70)
print("LOADING INPUT FILES")
print("="*70)

# Check if files exist
if not os.path.exists(WORLDCOVER_FILE):
    raise FileNotFoundError(f"WorldCover file not found: {WORLDCOVER_FILE}")
if not os.path.exists(GEOJSON_FILE):
    raise FileNotFoundError(f"GeoJSON file not found: {GEOJSON_FILE}")

# Load WorldCover metadata
print("\nWorldCover Tile:")
with rasterio.open(WORLDCOVER_FILE) as src:
    print(f"  Dimensions: {src.height} x {src.width} pixels")
    print(f"  Bands: {src.count}")
    print(f"  CRS: {src.crs}")
    print(f"  Bounds: {src.bounds}")
    print(f"  Resolution: {src.res[0]:.2f} m")
    worldcover_crs = src.crs

# Load GeoJSON (AOI)
print("\nGeoJSON (AOI):")
aoi = gpd.read_file(GEOJSON_FILE)

# Ensure AOI is in WGS84
if aoi.crs is None:
    aoi.set_crs("EPSG:4326", inplace=True)
    print("  ⚠ No CRS found, set to EPSG:4326")

if aoi.crs.to_epsg() != 4326:
    aoi = aoi.to_crs("EPSG:4326")
    print(f"  ⚠ Reprojected to EPSG:4326")

print(f"  CRS: {aoi.crs}")
print(f"  Bounds: {aoi.total_bounds}")
print(f"  Features: {len(aoi)}")

print("\n✓ Input files loaded successfully")
print("="*70)

## 4. Clip WorldCover to AOI

In [None]:
print("\n" + "="*70)
print("CLIPPING WORLDCOVER TO AOI")
print("="*70)

# Get geometries from GeoJSON
geometries = [feature["geometry"] for feature in aoi.__geo_interface__["features"]]

print(f"\nClipping to {len(geometries)} geometry/geometries...")

# Open WorldCover and clip
with rasterio.open(WORLDCOVER_FILE) as src:
    # Clip to AOI
    out_image, out_transform = mask(src, geometries, crop=True, all_touched=True)
    out_meta = src.meta.copy()
    
    # Update metadata
    out_meta.update({
        "driver": "GTiff",
        "height": out_image.shape[1],
        "width": out_image.shape[2],
        "transform": out_transform,
        "compress": "lzw"
    })
    
    print(f"\nClipped dimensions: {out_image.shape[1]} x {out_image.shape[2]} pixels")
    
    # Save clipped result
    with rasterio.open(OUTPUT_FILE, "w", **out_meta) as dest:
        dest.write(out_image)

print(f"\n✓ Clipped WorldCover saved: {OUTPUT_FILE}")
print(f"  File size: {os.path.getsize(OUTPUT_FILE) / (1024**2):.2f} MB")
print("="*70)

## 5. Analyze Clipped WorldCover

In [None]:
print("\n" + "="*70)
print("WORLDCOVER ANALYSIS")
print("="*70)

# WorldCover class names
class_names = {
    10: "Tree cover",
    20: "Shrubland",
    30: "Grassland",
    40: "Cropland",
    50: "Built-up",
    60: "Bare/sparse vegetation",
    70: "Snow and ice",
    80: "Permanent water bodies",
    90: "Herbaceous wetland",
    95: "Mangroves",
    100: "Moss and lichen"
}

# Load clipped data
with rasterio.open(OUTPUT_FILE) as src:
    data = src.read(1)

total_pixels = data.size
unique_classes, counts = np.unique(data, return_counts=True)

print("\nLand Cover Distribution:")
print("-" * 70)
print(f"{'Class':<6} {'Description':<30} {'Pixels':>12} {'Percentage':>10}")
print("-" * 70)

green_pixels = 0
green_classes = [10, 20, 30, 95]

for class_value, count in zip(unique_classes, counts):
    percentage = (count / total_pixels) * 100
    class_desc = class_names.get(class_value, f"Unknown")
    is_green = "✓" if class_value in green_classes else " "
    
    print(f"{class_value:<6} {class_desc:<30} {count:>12,} {percentage:>9.2f}% {is_green}")
    
    if class_value in green_classes:
        green_pixels += count

print("-" * 70)
print(f"{'TOTAL':<6} {'Green (10,20,30,95)':<30} {green_pixels:>12,} {100*green_pixels/total_pixels:>9.2f}%")
print(f"{'TOTAL':<6} {'Non-Green':<30} {total_pixels-green_pixels:>12,} {100*(total_pixels-green_pixels)/total_pixels:>9.2f}%")
print("="*70)

print("\n✓ Green classes (10, 20, 30, 95) will be used for ML training")

## 6. Visualize Clipped WorldCover

In [None]:
# Create visualization
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# 1. Full classification
im1 = axes[0].imshow(data, cmap='tab10', interpolation='nearest')
axes[0].set_title('WorldCover Classification', fontsize=14, fontweight='bold')
axes[0].axis('off')
plt.colorbar(im1, ax=axes[0], fraction=0.046, pad=0.04, label='Class')

# 2. Binary green/non-green
green_mask = np.isin(data, green_classes).astype(np.uint8)
im2 = axes[1].imshow(green_mask, cmap='RdYlGn', vmin=0, vmax=1, interpolation='nearest')
axes[1].set_title(f'Green Space (Trees, Shrubs, Grass, Mangroves)\n{100*green_pixels/total_pixels:.2f}% Green', 
                  fontsize=14, fontweight='bold')
axes[1].axis('off')
plt.colorbar(im2, ax=axes[1], fraction=0.046, pad=0.04, label='0=Non-Green, 1=Green')

plt.suptitle('Sydney - WorldCover Analysis', fontsize=16, fontweight='bold', y=0.98)
plt.tight_layout()

# Save figure
viz_output = OUTPUT_FILE.replace('.tif', '_visualization.png')
plt.savefig(viz_output, dpi=300, bbox_inches='tight')
plt.show()

print(f"\n✓ Visualization saved: {viz_output}")

## 7. Summary

In [None]:
print("\n" + "="*80)
print("SUMMARY")
print("="*80)

print(f"\nInput Files:")
print(f"  WorldCover tile: {os.path.basename(WORLDCOVER_FILE)}")
print(f"  GeoJSON AOI: {os.path.basename(GEOJSON_FILE)}")

print(f"\nOutput Files:")
print(f"  Clipped WorldCover: {OUTPUT_FILE}")
print(f"  Visualization: {viz_output}")

print(f"\nClipped Data:")
print(f"  Dimensions: {data.shape[0]} x {data.shape[1]} pixels")
print(f"  Total pixels: {total_pixels:,}")
print(f"  Green pixels: {green_pixels:,} ({100*green_pixels/total_pixels:.2f}%)")
print(f"  File size: {os.path.getsize(OUTPUT_FILE) / (1024**2):.2f} MB")

print(f"\nGreen Classes (for ML training):")
print(f"  10 - Tree cover")
print(f"  20 - Shrubland")
print(f"  30 - Grassland")
print(f"  95 - Mangroves")

print(f"\n✓ WorldCover successfully clipped to Sydney AOI!")
print(f"\nYou can now use this file in your ML training notebooks:")
print(f"  worldcover_file = '{OUTPUT_FILE}'")

print("="*80)