In [2]:
# Cell 1: Environment Setup
import ee
import geemap
import pandas as pd
import numpy as np
from datetime import datetime
import json

# Earth Engine authentication and initialization
try:
    ee.Initialize(project='nasa-flood')  # Replace with your project ID
except Exception as e:
    print("Authenticating with Earth Engine...")
    ee.Authenticate()
    ee.Initialize(project='nasa-flood')

print("Earth Engine initialized successfully")
print(f"Current time: {datetime.now()}")

Earth Engine initialized successfully
Current time: 2025-10-04 21:20:05.961411


In [3]:
# Cell 2: Study Area Definition
# Vietnam Mekong Delta (focused on lower delta region)
aoi = ee.Geometry.Rectangle([104.5, 8.5, 106.8, 11.0])

# Area of Interest metadata
aoi_metadata = {
    "name": "Vietnam Mekong Delta",
    "bbox": [104.5, 8.5, 106.8, 11.0],
    "description": "Lower Mekong Delta including Ho Chi Minh City south to Ca Mau Peninsula",
    "area_km2": aoi.area().divide(1e6).getInfo(),
    "provinces": ["Long An", "Tien Giang", "Ben Tre", "Vinh Long", "Tra Vinh", 
                  "Can Tho", "Soc Trang", "Bac Lieu", "Ca Mau"]
}

print(f"Study Area: {aoi_metadata['name']}")
print(f"Total Area: {aoi_metadata['area_km2']:,.2f} km²")

# Save metadata
with open('../data/metadata/aoi_metadata.json', 'w') as f:
    json.dump(aoi_metadata, f, indent=2)


Study Area: Vietnam Mekong Delta
Total Area: 70,071.10 km²


In [4]:
# Cell 3: Visualization of Study Area
Map = geemap.Map(center=[10.0, 105.5], zoom=8)
Map.addLayer(aoi, {'color': 'red'}, 'Study Area: Vietnam Mekong Delta')
Map.add_basemap('HYBRID')
Map


Map(center=[10.0, 105.5], controls=(WidgetControl(options=['position', 'transparent_bg'], position='topright',…

In [5]:
# Cell 4: Baseline Water Mapping (2005-2006)
# Using Landsat 5 for pre-dam baseline
print("Collecting Landsat 5 baseline data (2005-2006)...")

def mask_l5_clouds(image):
    """Apply cloud mask for Landsat 5 Collection 2"""
    qa = image.select('QA_PIXEL')
    cloud_shadow = 1 << 4
    clouds = 1 << 3
    mask = qa.bitwiseAnd(cloud_shadow).eq(0).And(qa.bitwiseAnd(clouds).eq(0))
    return image.updateMask(mask)

def scale_l5_bands(image):
    """Scale Landsat 5 Collection 2 optical and thermal bands"""
    optical = image.select('SR_B.').multiply(0.0000275).add(-0.2)
    thermal = image.select('ST_B6').multiply(0.00341802).add(149.0)
    return image.addBands(optical, None, True).addBands(thermal, None, True)

# Dry season baseline (November 2005 - April 2006)
landsat_dry = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
    .filterBounds(aoi) \
    .filterDate('2005-11-01', '2006-04-30') \
    .map(mask_l5_clouds) \
    .map(scale_l5_bands) \
    .median() \
    .clip(aoi)

# Wet season baseline (May - October 2006)
landsat_wet = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
    .filterBounds(aoi) \
    .filterDate('2006-05-01', '2006-10-31') \
    .map(mask_l5_clouds) \
    .map(scale_l5_bands) \
    .median() \
    .clip(aoi)

print("Landsat 5 baseline composites created")

# Visualization parameters
vis_params_landsat = {
    'bands': ['SR_B3', 'SR_B2', 'SR_B1'],
    'min': 0.0,
    'max': 0.3,
    'gamma': 1.4
}


Collecting Landsat 5 baseline data (2005-2006)...
Landsat 5 baseline composites created


In [6]:
# Cell 5: JRC Global Surface Water Baseline
print("Loading JRC Global Surface Water dataset...")

jrc = ee.Image('JRC/GSW1_4/GlobalSurfaceWater').clip(aoi)

# Extract key bands
jrc_seasonality = jrc.select('seasonality')
jrc_occurrence = jrc.select('occurrence')
jrc_change = jrc.select('change_abs')

# Classify water permanence
# seasonality: 1=permanent, 2-3=seasonal, 4-12=ephemeral
permanent_water = jrc_seasonality.eq(1)
seasonal_water = jrc_seasonality.gte(2).And(jrc_seasonality.lte(3))

# Calculate baseline water areas
permanent_area = permanent_water.multiply(ee.Image.pixelArea()) \
    .reduceRegion(
        reducer=ee.Reducer.sum(),
        geometry=aoi,
        scale=30,
        maxPixels=1e12
    )

seasonal_area = seasonal_water.multiply(ee.Image.pixelArea()) \
    .reduceRegion(
        reducer=ee.Reducer.sum(),
        geometry=aoi,
        scale=30,
        maxPixels=1e12
    )

baseline_water = {
    "permanent_water_km2": ee.Number(permanent_area.get('seasonality')).divide(1e6).getInfo(),
    "seasonal_water_km2": ee.Number(seasonal_area.get('seasonality')).divide(1e6).getInfo(),
    "data_period": "1984-2021",
    "source": "JRC Global Surface Water v1.4"
}

print(f"Permanent Water: {baseline_water['permanent_water_km2']:,.2f} km²")
print(f"Seasonal Water: {baseline_water['seasonal_water_km2']:,.2f} km²")

# Save baseline metadata
with open('../data/metadata/baseline_water.json', 'w') as f:
    json.dump(baseline_water, f, indent=2)

Loading JRC Global Surface Water dataset...
Permanent Water: 4,453.64 km²
Seasonal Water: 5,184.13 km²


In [7]:
# Cell 6: Visualize Baseline Conditions
Map2 = geemap.Map(center=[10.0, 105.5], zoom=8)
Map2.addLayer(landsat_dry, vis_params_landsat, 'Landsat 5 - Dry Season (2005-06)', False)
Map2.addLayer(landsat_wet, vis_params_landsat, 'Landsat 5 - Wet Season (2006)', False)
Map2.addLayer(permanent_water.selfMask(), {'palette': 'darkblue'}, 'Permanent Water (JRC)')
Map2.addLayer(seasonal_water.selfMask(), {'palette': 'lightblue'}, 'Seasonal Water (JRC)', False)
Map2.addLayerControl()
Map2

Map(center=[10.0, 105.5], controls=(WidgetControl(options=['position', 'transparent_bg'], position='topright',…

In [8]:
# Cell 7: Sentinel-1 Data Availability Check
print("Checking Sentinel-1 SAR data availability...")

# Sentinel-1 became operational in 2014, we analyze from 2015
s1_test = ee.ImageCollection('COPERNICUS/S1_GRD') \
    .filterBounds(aoi) \
    .filterDate('2015-01-01', '2024-12-31') \
    .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV')) \
    .filter(ee.Filter.eq('instrumentMode', 'IW'))

s1_count = s1_test.size().getInfo()
print(f"Total Sentinel-1 images available (2015-2024): {s1_count}")

# Check data availability by year
s1_availability = []
for year in range(2015, 2025):
    count = ee.ImageCollection('COPERNICUS/S1_GRD') \
        .filterBounds(aoi) \
        .filterDate(f'{year}-01-01', f'{year}-12-31') \
        .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV')) \
        .size().getInfo()
    
    s1_availability.append({'year': year, 'image_count': count})
    print(f"  {year}: {count} images")

# Save availability data
pd.DataFrame(s1_availability).to_csv('../data/metadata/s1_availability.csv', index=False)

Checking Sentinel-1 SAR data availability...
Total Sentinel-1 images available (2015-2024): 3563
  2015: 91 images
  2016: 185 images
  2017: 400 images
  2018: 475 images
  2019: 504 images
  2020: 485 images
  2021: 439 images
  2022: 346 images
  2023: 310 images
  2024: 318 images


In [9]:
# Cell 8: CHIRPS Precipitation Data Check
print("Checking CHIRPS precipitation data availability...")

chirps_test = ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') \
    .filterBounds(aoi) \
    .filterDate('2015-01-01', '2024-12-31')

chirps_count = chirps_test.size().getInfo()
print(f"Total CHIRPS daily images available: {chirps_count}")

# Test precipitation extraction for one year
test_precip = ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') \
    .filterBounds(aoi) \
    .filterDate('2015-08-01', '2015-09-30') \
    .sum()

test_precip_value = test_precip.reduceRegion(
    reducer=ee.Reducer.mean(),
    geometry=aoi,
    scale=5000,
    maxPixels=1e12
).get('precipitation').getInfo()

print(f"Test precipitation (Aug-Sep 2015): {test_precip_value:.2f} mm")

Checking CHIRPS precipitation data availability...
Total CHIRPS daily images available: 3652
Test precipitation (Aug-Sep 2015): 475.27 mm


In [10]:
# Cell 9: Data Collection Summary
print("\n" + "="*60)
print("DATA ACQUISITION SUMMARY")
print("="*60)
print(f"Study Area: {aoi_metadata['name']}")
print(f"Area: {aoi_metadata['area_km2']:,.2f} km²")
print(f"\nBaseline Period: 2005-2006")
print(f"  - Permanent Water: {baseline_water['permanent_water_km2']:,.2f} km²")
print(f"  - Seasonal Water: {baseline_water['seasonal_water_km2']:,.2f} km²")
print(f"\nAnalysis Period: 2015-2024")
print(f"  - Sentinel-1 images: {s1_count}")
print(f"  - CHIRPS daily images: {chirps_count}")
print("\nData sources ready for flood analysis.")
print("="*60)


DATA ACQUISITION SUMMARY
Study Area: Vietnam Mekong Delta
Area: 70,071.10 km²

Baseline Period: 2005-2006
  - Permanent Water: 4,453.64 km²
  - Seasonal Water: 5,184.13 km²

Analysis Period: 2015-2024
  - Sentinel-1 images: 3563
  - CHIRPS daily images: 3652

Data sources ready for flood analysis.


In [11]:
# Cell 10: Export reference data (optional)
# Uncomment to export baseline water mask as GeoTIFF
"""
task = ee.batch.Export.image.toDrive(
    image=permanent_water.toByte(),
    description='mekong_permanent_water_baseline',
    folder='MekongWatch',
    region=aoi,
    scale=30,
    maxPixels=1e12
)
task.start()
print("Export task started. Check Google Drive 'MekongWatch' folder.")
"""

print("\n✓ Data acquisition complete. Proceed to notebook 02_flood_analysis.ipynb")


✓ Data acquisition complete. Proceed to notebook 02_flood_analysis.ipynb
