In [4]:
! pip install earthengine-api geemap

^C


Collecting earthengine-api
  Using cached earthengine_api-1.5.5-py3-none-any.whl.metadata (2.1 kB)
Collecting geemap
  Using cached geemap-0.35.3-py2.py3-none-any.whl.metadata (12 kB)
Collecting google-cloud-storage (from earthengine-api)
  Using cached google_cloud_storage-3.1.0-py2.py3-none-any.whl.metadata (12 kB)
Collecting google-api-python-client>=1.12.1 (from earthengine-api)
  Using cached google_api_python_client-2.163.0-py2.py3-none-any.whl.metadata (6.7 kB)
Collecting google-auth>=1.4.1 (from earthengine-api)
  Using cached google_auth-2.38.0-py2.py3-none-any.whl.metadata (4.8 kB)
Collecting google-auth-httplib2>=0.0.3 (from earthengine-api)
  Using cached google_auth_httplib2-0.2.0-py2.py3-none-any.whl.metadata (2.2 kB)
Collecting bqplot (from geemap)
  Using cached bqplot-0.12.44-py2.py3-none-any.whl.metadata (6.4 kB)
Collecting eerepr>=0.1.0 (from geemap)
  Using cached eerepr-0.1.1-py3-none-any.whl.metadata (4.3 kB)
Collecting folium>=0.17.0 (from geemap)
  Using cached 


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
import ee
import geemap

# Authenticate and initialize
ee.Authenticate()
ee.Initialize()



Successfully saved authorization token.


In [11]:
import pandas as pd
import ee
import datetime

# Authenticate & Initialize Earth Engine
ee.Initialize()

# Load historical flood data
df_historical = pd.read_csv('./datasets/floods_inventory/info.csv')

# Filter data to include only floods after 2015
df_historical['Start Date'] = pd.to_datetime(df_historical['Start Date'], errors='coerce')
df_historical['End Date'] = pd.to_datetime(df_historical['End Date'], errors='coerce')
df_historical = df_historical[df_historical['Start Date'].dt.year >= 2015]

# Define buffer size (e.g., 50 km)
buffer_size = 50000  # 50 km

# Cloud Mask Function (QA60-based)
def mask_s2_clouds(image):
    qa = image.select("QA60")  # Sentinel-2 cloud mask band

    # Bits 10 and 11 are clouds and cirrus, respectively
    cloud_bit_mask = 1 << 10
    cirrus_bit_mask = 1 << 11

    # Both flags should be zero for clear pixels
    mask = qa.bitwiseAnd(cloud_bit_mask).eq(0).And(
        qa.bitwiseAnd(cirrus_bit_mask).eq(0)
    )

    # Apply mask and scale the image
    return image.updateMask(mask).divide(10000)

# Loop through each row and extract imagery
for index, row in df_historical.iterrows():
    try:
        lat, lon = row['Latitude'], row['Longitude']
        
        # Validate coordinates
        if not (-90 <= lat <= 90) or not (-180 <= lon <= 180):
            print(f"Invalid coordinates: ({lat}, {lon}) at index {index}. Skipping...")
            continue
            
        start_date, end_date = row['Start Date'], row['End Date']

        # Ensure valid date range
        if start_date >= end_date:
            print(f"Invalid date range at index {index}: {start_date} to {end_date}. Swapping dates.")
            start_date, end_date = end_date, start_date

        # Ensure at least one day difference
        if (end_date - start_date).days < 1:
            print(f"Date range too short at index {index}. Extending end date by 1 day.")
            end_date = start_date + datetime.timedelta(days=1)

        # Convert dates to string format for GEE
        start_date_str = start_date.strftime('%Y-%m-%d')
        end_date_str = end_date.strftime('%Y-%m-%d')

        # Define AOI (point with buffer)
        poi = ee.Geometry.Point([lon, lat])
        aoi = poi.buffer(buffer_size)

        # Load Sentinel-2 Harmonized data with cloud masking
        s2_collection = (ee.ImageCollection('COPERNICUS/S2_HARMONIZED')
                         .filterBounds(aoi)
                         .filterDate(start_date_str, end_date_str)
                         .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10))  # Stricter cloud filtering
                         .map(mask_s2_clouds)  # Apply cloud mask
                         .sort("CLOUDY_PIXEL_PERCENTAGE"))

        # Check if images are available
        count = s2_collection.size().getInfo()
        print(f"Found {count} images for location ({lat}, {lon}) from {start_date_str} to {end_date_str}")

        if count == 0:
            print(f"No suitable images found for location ({lat}, {lon}). Skipping...")
            continue

        # Select the first image from the collection
        image = s2_collection.median().clip(aoi)

        # Visualization Parameters
        rgb_vis = {
            'min': 0.0,
            'max': 0.3,
            'bands': ['B4', 'B3', 'B2'],  # True Color Composite
        }

        # Define export task
        task = ee.batch.Export.image.toDrive(
    image=image,
    description=f"Sentinel2_Flood_{index}",
    folder="GEE_Flood_Images",
    fileNamePrefix=f"Sentinel2_Flood_{index}_{lat}_{lon}",
    scale=10,
    region=aoi,
    fileFormat='GeoTIFF',
    maxPixels=1e9  # Increase max allowed pixels
)


        # Start the export task
        task.start()
        print(f"Exporting {index} to Google Drive...")

    except Exception as e:
        print(f"Error processing location ({lat}, {lon}): {str(e)}")

print("All export tasks initiated! Check Google Drive for results.")


Found 0 images for location (26.2074, 82.6165) from 2015-07-15 to 2015-08-19
No suitable images found for location (26.2074, 82.6165). Skipping...
Found 0 images for location (26.8946, 93.751) from 2015-08-13 to 2015-11-09
No suitable images found for location (26.8946, 93.751). Skipping...
Invalid date range at index 2: 2015-10-11 00:00:00 to 2015-04-12 00:00:00. Swapping dates.
Found 7 images for location (11.8278, 78.8554) from 2015-04-12 to 2015-10-11
Exporting 2 to Google Drive...
Invalid date range at index 3: 2016-04-20 00:00:00 to 2016-01-05 00:00:00. Swapping dates.
Found 22 images for location (27.464, 95.6068) from 2016-01-05 to 2016-04-20
Exporting 3 to Google Drive...
Found 2 images for location (27.068, 93.949) from 2016-06-29 to 2016-08-26
Exporting 4 to Google Drive...
Invalid date range at index 5: 2016-07-07 00:00:00 to 2016-03-08 00:00:00. Swapping dates.
Found 26 images for location (22.8107, 80.8349) from 2016-03-08 to 2016-07-07
Exporting 5 to Google Drive...
Foun

In [14]:
import os
import re  # Import regex module

# Initialize a list to store extracted data
satellite_features = []

for tiff in geo_tiffs:
    with rasterio.open(tiff) as src:
        image_array = src.read()  # Read all bands

        # Sentinel-2 Band Mapping
        B4 = image_array[3]  # Red
        B3 = image_array[2]  # Green
        B8 = image_array[7]  # NIR
        B11 = image_array[10] # SWIR

        # Compute indices
        ndvi = np.nanmean(calculate_ndvi(B8, B4))
        ndwi = np.nanmean(calculate_ndwi(B3, B8))
        ndbi = np.nanmean(calculate_ndbi(B11, B8))

        # Extract metadata from filename
        filename = os.path.basename(tiff)  # Get filename only
        metadata = filename.replace(".tif", "").split("_")  # Remove extension and split

        try:
            # Extract latitude from metadata[5]
            lat = float(metadata[5])

            # Extract longitude: Remove the extra `-000000XXXX` part using regex
            lon = float(re.split(r'-', metadata[6])[0])  # Take only the first number

        except ValueError:
            print(f"Skipping file {filename} due to incorrect metadata format.")
            continue

        # Store extracted features
        satellite_features.append({
            "Latitude": lat,
            "Longitude": lon,
            "NDVI": ndvi,
            "NDWI": ndwi,
            "NDBI": ndbi
        })

# Convert to DataFrame
df_satellite = pd.DataFrame(satellite_features)

# Save extracted features
df_satellite.to_csv("new_extracted_satellite_features.csv", index=False)

print("Satellite features extracted successfully!")


ValueError: could not convert string to float: 'Flood'

In [17]:
# Print the filenames to check the structure
print(geo_tiffs[:5])  # Print first 5 filenames

for tiff in geo_tiffs[:5]:  # Print for first 5 files
    print(tiff.split("_"))



['./datasets/new_satellite_imagery\\Sentinel2_Flood_12_28.1657_79.076-0000000000-0000000000.tif', './datasets/new_satellite_imagery\\Sentinel2_Flood_12_28.1657_79.076-0000000000-0000007680.tif', './datasets/new_satellite_imagery\\Sentinel2_Flood_12_28.1657_79.076-0000007680-0000000000.tif', './datasets/new_satellite_imagery\\Sentinel2_Flood_12_28.1657_79.076-0000007680-0000007680.tif', './datasets/new_satellite_imagery\\Sentinel2_Flood_13_10.489_79.0039-0000000000-0000000000.tif']
['./datasets/new', 'satellite', 'imagery\\Sentinel2', 'Flood', '12', '28.1657', '79.076-0000000000-0000000000.tif']
['./datasets/new', 'satellite', 'imagery\\Sentinel2', 'Flood', '12', '28.1657', '79.076-0000000000-0000007680.tif']
['./datasets/new', 'satellite', 'imagery\\Sentinel2', 'Flood', '12', '28.1657', '79.076-0000007680-0000000000.tif']
['./datasets/new', 'satellite', 'imagery\\Sentinel2', 'Flood', '12', '28.1657', '79.076-0000007680-0000007680.tif']
['./datasets/new', 'satellite', 'imagery\\Sentinel