# Automated WorldCover Download for Multiple Cities
## Download and clip WorldCover 2021 data for all cities

This notebook automatically:
1. Finds all cities with Multi-Month stacks
2. Loads their GeoJSON AOIs
3. Determines the correct WorldCover tile
4. Downloads and clips the data for each city

## 1. Import Libraries

In [None]:
import os
import glob
import requests
import numpy as np
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from rasterio.merge import merge
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

print("✓ Libraries imported successfully")

## 2. Configuration

In [None]:
# Base paths
BASE_PATH = "/Users/timgotschim/Documents/LLM/infrared.city"
STACKS_FOLDER = os.path.join(BASE_PATH, "21 Stacks")
GEOJSON_FOLDER = os.path.join(BASE_PATH, "sentinel_data")
WORLDCOVER_FOLDER = os.path.join(BASE_PATH, "worldcover")

# Create WorldCover folder
os.makedirs(WORLDCOVER_FOLDER, exist_ok=True)

# WorldCover base URL
WORLDCOVER_BASE_URL = "https://esa-worldcover.s3.eu-central-1.amazonaws.com/v200/2021/map/"

print("Configuration:")
print(f"  Stacks folder: {STACKS_FOLDER}")
print(f"  GeoJSON folder: {GEOJSON_FOLDER}")
print(f"  WorldCover output: {WORLDCOVER_FOLDER}")

## 3. Helper Functions

In [None]:
def get_worldcover_tile_name(lon, lat):
    """
    Get WorldCover tile name from coordinates.
    Tiles are 3x3 degrees, named like: ESA_WorldCover_10m_2021_v200_N51W003_Map.tif
    
    Tile naming rules:
    - Latitude: N/S + 2 digits (rounded down to nearest 3)
    - Longitude: E/W + 3 digits (rounded down to nearest 3)
    - Tiles are named by their SW corner
    """
    import math
    
    # Round down to nearest 3-degree tile boundary
    # For latitude: N51 covers 51-54°N, S03 covers 3-6°S
    lat_floor = math.floor(lat / 3) * 3
    lat_dir = 'N' if lat_floor >= 0 else 'S'
    lat_band = abs(lat_floor)
    
    # For longitude: E000 covers 0-3°E, W003 covers 3-6°W
    lon_floor = math.floor(lon / 3) * 3
    lon_dir = 'E' if lon_floor >= 0 else 'W'
    lon_band = abs(lon_floor)
    
    tile_name = f"ESA_WorldCover_10m_2021_v200_{lat_dir}{lat_band:02d}{lon_dir}{lon_band:03d}_Map.tif"
    return tile_name


def download_worldcover_tile(tile_name, output_dir):
    """
    Download a WorldCover tile if it doesn't exist.
    """
    output_path = os.path.join(output_dir, tile_name)
    
    if os.path.exists(output_path):
        print(f"    ✓ Tile already exists: {tile_name}")
        return output_path
    
    url = f"{WORLDCOVER_BASE_URL}{tile_name}"
    print(f"    Downloading: {tile_name}")
    print(f"    URL: {url}")
    
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        
        total_size = int(response.headers.get('content-length', 0))
        
        with open(output_path, 'wb') as f:
            with tqdm(total=total_size, unit='B', unit_scale=True, desc=f"    {tile_name}") as pbar:
                for chunk in response.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)
                        pbar.update(len(chunk))
        
        print(f"    ✓ Downloaded: {tile_name}")
        return output_path
        
    except requests.exceptions.RequestException as e:
        print(f"    ✗ Error downloading {tile_name}: {e}")
        if os.path.exists(output_path):
            os.remove(output_path)
        return None


def clip_worldcover_to_aoi(worldcover_file, aoi_file, output_file):
    """
    Clip WorldCover tile to AOI.
    """
    # Load AOI
    aoi = gpd.read_file(aoi_file)
    if aoi.crs is None:
        aoi.set_crs("EPSG:4326", inplace=True)
    aoi = aoi.to_crs("EPSG:4326")
    
    geometries = [feature["geometry"] for feature in aoi.__geo_interface__["features"]]
    
    # Clip
    with rasterio.open(worldcover_file) as src:
        out_image, out_transform = mask(src, geometries, crop=True)
        out_meta = src.meta.copy()
        
        # Update metadata
        out_meta.update({
            "driver": "GTiff",
            "height": out_image.shape[1],
            "width": out_image.shape[2],
            "transform": out_transform,
            "compress": "lzw"
        })
        
        # Save clipped result
        with rasterio.open(output_file, "w", **out_meta) as dest:
            dest.write(out_image)
    
    return output_file


print("✓ Helper functions defined")

## 4. Discover Cities

In [None]:
print("="*80)
print("DISCOVERING CITIES")
print("="*80)

# Find all Multi-Month stack files
stack_files = glob.glob(os.path.join(STACKS_FOLDER, "*_MultiMonth_stack.tif"))

print(f"\nFound {len(stack_files)} Multi-Month stacks")

cities_to_process = []

for stack_file in sorted(stack_files):
    # Extract city name
    filename = os.path.basename(stack_file)
    city_name = filename.replace("_MultiMonth_stack.tif", "")
    
    # Find GeoJSON
    geojson_patterns = [
        os.path.join(GEOJSON_FOLDER, f"{city_name}.geojson"),
        os.path.join(GEOJSON_FOLDER, city_name, f"{city_name}.geojson"),
    ]
    
    geojson_file = None
    for pattern in geojson_patterns:
        if os.path.exists(pattern):
            geojson_file = pattern
            break
    
    if not geojson_file:
        print(f"  ✗ {city_name}: No GeoJSON found")
        continue
    
    # Check if WorldCover already exists
    worldcover_output = os.path.join(WORLDCOVER_FOLDER, f"{city_name}_WorldCover_2021.tif")
    already_exists = os.path.exists(worldcover_output)
    
    cities_to_process.append({
        "name": city_name,
        "stack_file": stack_file,
        "geojson_file": geojson_file,
        "worldcover_output": worldcover_output,
        "already_exists": already_exists
    })
    
    status = "✓ (exists)" if already_exists else "⚠ (needs download)"
    print(f"  {city_name:20s} - GeoJSON: ✓  WorldCover: {status}")

print(f"\n{'='*80}")
print(f"Cities ready for processing: {len(cities_to_process)}")
cities_needing_download = [c for c in cities_to_process if not c['already_exists']]
print(f"Cities needing WorldCover download: {len(cities_needing_download)}")
print(f"{'='*80}")

## 5. Download and Clip WorldCover for Each City

In [None]:
print("\n" + "="*80)
print("DOWNLOADING AND PROCESSING WORLDCOVER")
print("="*80)

successful_downloads = 0
failed_downloads = 0
skipped_existing = 0

for city_data in cities_to_process:
    city_name = city_data["name"]
    geojson_file = city_data["geojson_file"]
    worldcover_output = city_data["worldcover_output"]
    
    print(f"\n{'='*80}")
    print(f"Processing: {city_name}")
    print(f"{'='*80}")
    
    # Skip if already exists
    if city_data["already_exists"]:
        print(f"  ✓ WorldCover already exists, skipping download")
        skipped_existing += 1
        continue
    
    try:
        # Load AOI to get center coordinates
        aoi = gpd.read_file(geojson_file)
        bounds = aoi.total_bounds  # [minx, miny, maxx, maxy]
        center_lon = (bounds[0] + bounds[2]) / 2
        center_lat = (bounds[1] + bounds[3]) / 2
        
        print(f"  City center: {center_lat:.4f}°N, {center_lon:.4f}°E")
        
        # Determine WorldCover tile
        tile_name = get_worldcover_tile_name(center_lon, center_lat)
        print(f"  Required tile: {tile_name}")
        
        # Download tile
        tile_path = download_worldcover_tile(tile_name, WORLDCOVER_FOLDER)
        
        if not tile_path:
            print(f"  ✗ Failed to download WorldCover tile")
            failed_downloads += 1
            continue
        
        # Clip to AOI
        print(f"  Clipping to AOI...")
        clip_worldcover_to_aoi(tile_path, geojson_file, worldcover_output)
        
        # Verify output
        with rasterio.open(worldcover_output) as src:
            data = src.read(1)
            unique, counts = np.unique(data, return_counts=True)
            
            print(f"  ✓ Clipped WorldCover saved: {worldcover_output}")
            print(f"    Dimensions: {src.height}x{src.width} pixels")
            print(f"    File size: {os.path.getsize(worldcover_output) / (1024**2):.2f} MB")
            
            # Calculate green coverage
            green_classes = [10, 20, 30, 95]
            green_mask = np.isin(data, green_classes)
            green_percentage = 100 * green_mask.sum() / data.size
            print(f"    Green coverage: {green_percentage:.2f}%")
        
        successful_downloads += 1
        
    except Exception as e:
        print(f"  ✗ Error processing {city_name}: {e}")
        failed_downloads += 1
        continue

print(f"\n{'='*80}")
print("PROCESSING SUMMARY")
print(f"{'='*80}")
print(f"  Total cities: {len(cities_to_process)}")
print(f"  Already existed: {skipped_existing}")
print(f"  Successfully downloaded: {successful_downloads}")
print(f"  Failed: {failed_downloads}")
print(f"{'='*80}")

## 6. Verify All Cities Have WorldCover

In [None]:
print("\n" + "="*80)
print("FINAL VERIFICATION")
print("="*80)

complete_cities = 0
incomplete_cities = []

print("\nCity Status:")
for city_data in cities_to_process:
    city_name = city_data["name"]
    has_stack = os.path.exists(city_data["stack_file"])
    has_geojson = os.path.exists(city_data["geojson_file"])
    has_worldcover = os.path.exists(city_data["worldcover_output"])
    
    if has_stack and has_geojson and has_worldcover:
        print(f"  ✓ {city_name:20s} - Complete (Stack, GeoJSON, WorldCover)")
        complete_cities += 1
    else:
        status_parts = []
        if not has_stack:
            status_parts.append("missing Stack")
        if not has_geojson:
            status_parts.append("missing GeoJSON")
        if not has_worldcover:
            status_parts.append("missing WorldCover")
        
        status = ", ".join(status_parts)
        print(f"  ✗ {city_name:20s} - Incomplete ({status})")
        incomplete_cities.append(city_name)

print(f"\n{'='*80}")
print(f"Cities ready for training: {complete_cities}/{len(cities_to_process)}")

if incomplete_cities:
    print(f"\nIncomplete cities: {', '.join(incomplete_cities)}")
    print("\nPlease check the errors above and ensure:")
    print("  1. GeoJSON files exist in the sentinel_data folder")
    print("  2. Internet connection is stable for downloads")
    print("  3. Sufficient disk space is available")
else:
    print("\n✓ All cities are complete and ready for training!")
    print("\nYou can now run the Multi_City_WorldCover_Training notebook.")

print(f"{'='*80}")

## 7. WorldCover Class Legend

In [None]:
print("\n" + "="*80)
print("WORLDCOVER 2021 CLASS LEGEND")
print("="*80)

print("\nClass | Description              | Used for Training")
print("-" * 80)
print("  10  | Tree cover               | ✓ GREEN")
print("  20  | Shrubland                | ✓ GREEN")
print("  30  | Grassland                | ✓ GREEN")
print("  40  | Cropland                 | ✗ Non-green")
print("  50  | Built-up                 | ✗ Non-green")
print("  60  | Bare/sparse vegetation   | ✗ Non-green")
print("  70  | Snow and ice             | ✗ Non-green")
print("  80  | Permanent water bodies   | ✗ Non-green")
print("  90  | Herbaceous wetland       | ✗ Non-green")
print("  95  | Mangroves                | ✓ GREEN")
print(" 100  | Moss and lichen          | ✗ Non-green")

print("\n" + "="*80)
print("Green classes used for training: 10, 20, 30, 95")
print("="*80)