In [1]:
# import sys
# !{sys.executable} -m pip install pip earthengine-api
# !{sys.executable} -m pip install pip geemap

In [2]:
import ee
# ee.Authenticate()

In [3]:
ee.Initialize()

In [4]:
import geemap
import ipyleaflet
import numpy as np
import requests
import os
import pandas as pd
import rasterio
import boto3
import geopandas as gpd
import glob

# Define function to calculate average green mask

In [5]:
## Calculate and load vegetation cover raster
def extract_greenmask(boundary_path, startdate, enddate, NDVIthreshold = 0.4):
    
    gdf = gpd.read_file(boundary_path)
    gdf_id = gdf[['geometry']]
    
    FC = geemap.geopandas_to_ee(gdf_id)
     
    NDVIthresholdStr = str(NDVIthreshold)

    s2 = ee.ImageCollection("COPERNICUS/S2")

    def addNDVI(image):
      ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
      return image.addBands(ndvi)

    green = s2.filterDate(startdate, enddate).filterBounds(FC).map(addNDVI)
    green = green.qualityMosaic('NDVI').select('NDVI').float();
    greenScale = green.projection().nominalScale()

    ndvi = green.updateMask(green.select('NDVI'))
    greenmask = ndvi.gte(NDVIthreshold)
    
    # convert to percentages
    greenmask = greenmask.multiply(100)
    
    return ndvi, greenmask

# Define function to compute zonal statistics

In [6]:
def zonal_stats(boundary_path, gee_image, var_name, agg = 'mean', scale = 30):
    
    gdf = gpd.read_file(boundary_path)
    gdf_id = gdf[['geometry']]
    
    FC = geemap.geopandas_to_ee(gdf_id)
    
    if agg == 'mean': reducer = ee.Reducer.mean()
    elif agg == 'max': reducer = ee.Reducer.max()
    elif agg == 'min': reducer = ee.Reducer.min()
        
    ee_reduceRegion = ee.ImageCollection(gee_image).toBands().reduceRegions(
        collection = FC ,
        reducer = reducer,
        scale = scale,
        tileScale = 4)
    
    zs = geemap.ee_to_geopandas(ee_reduceRegion)
    
    zs = zs.rename({'mean':var_name}, axis = 1)
    
    z_stats = zs.join(gdf.drop('geometry', axis = 1)).set_crs(gdf.crs)
    
    return (z_stats)

In [7]:
#boundary_path = 'https://cities-socio-economic-vulnerability.s3.eu-west-3.amazonaws.com/data/dev/india_2/census_geo/2011/ward/01/census_geo.geojson'

In [8]:
boundary_path = '../Data/Mumbai_Wards.geojson'

In [9]:
ndvi_test, greenmask_test = extract_greenmask(boundary_path, '2020-01-01', '2021-01-01', NDVIthreshold = 0.4)

In [10]:
ndvi_test

In [11]:
zs_ndvi = zonal_stats(boundary_path, ndvi_test, 'ndvi_mean', agg = 'mean', scale = 30)

In [12]:
zs_ndvi.head()

Unnamed: 0,geometry,ndvi_mean,index,year,entity_level,state_id,state_name,district_id,district_name,subdistrict_id,...,hh_prop_ntv,hh_prop_ninet,hh_prop_landline,hh_prop_cp,hh_prop_landlineandcp,hh_prop_bike,hh_prop_mtrcl,hh_prop_ncar,hh_prop_allgoods,hh_prop_ngoods
0,"POLYGON ((72.83282 18.96774, 72.83282 18.96764...",0.367601,0,2011,ward,27,Maharashtra,519,Mumbai,99999,...,11.5,13.3,11.1,63.2,21.6,11.1,23.3,90.3,14.5,0.9
1,"POLYGON ((72.81882 18.94938, 72.81915 18.94964...",0.265567,1,2011,ward,27,Maharashtra,519,Mumbai,99999,...,8.0,20.2,15.2,37.0,45.0,10.2,30.7,88.0,22.7,0.8
2,"POLYGON ((72.84727 19.12803, 72.84716 19.12804...",0.450881,2,2011,ward,27,Maharashtra,518,Mumbai Suburban,99999,...,10.6,12.4,10.4,52.1,33.2,12.8,18.8,75.5,27.0,1.6
3,"POLYGON ((72.94663 19.15775, 72.94668 19.15760...",0.423307,3,2011,ward,27,Maharashtra,518,Mumbai Suburban,99999,...,9.7,13.9,10.0,51.0,34.7,15.3,21.5,79.8,24.5,1.5
4,"POLYGON ((72.89997 19.11999, 72.89978 19.11968...",0.513325,4,2011,ward,27,Maharashtra,518,Mumbai Suburban,99999,...,15.7,8.3,4.9,76.4,11.9,7.2,9.5,96.7,6.4,2.7


In [17]:
#slice selected variables and save to geojson
zs_ndvi[['index', 'ndvi_mean', 'geometry']].to_file('../Data/ZS_NDVI_Test.geojson')

# Store data in google drive 

In [57]:
# Download ee.Image of vegetation as GeoTIFF to Google Drive 
geemap.ee_export_image_to_drive(
    ndvi, # use convert to integer values, round and use toByte() to reduce file size
    description = 'NDVI_Test',
    folder='data', 
    scale=30, # 10 for native resolution, 50 for reduced storage needs
    region=boundary_geo_ee.geometry(),
    maxPixels = 5000000000
)

# Upload in aws

Since we can't download directly the rasters locally due to their size, the rasters are stored in a google-drive folder and then downloaded locally in order to push them back to s3 bucket.

In [None]:
# connect to s3
aws_credentials = pd.read_csv('/home/jovyan/PlanetaryComputerExamples/aws_credentials.csv')
aws_key = aws_credentials.iloc[0]['Access key ID']
aws_secret = aws_credentials.iloc[0]['Secret access key']

s3 = boto3.resource(
    service_name='s3',
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret
)

In [None]:
for i in range(0, len(boundary_georef)):
    boundary_id = boundary_georef.loc[i, 'geo_name']+'-'+boundary_georef.loc[i, 'aoi_boundary_name']
    geo_name = boundary_georef.loc[i, 'geo_name']
    print("\n boundary_id: "+boundary_id)

    # read local raster
    city_file = 'data/' + boundary_id + '-vegetation-cover-'+yearStr+'-NDVItheshold'+NDVIthresholdStr+'.tif'
    raster_path = os.path.join(out_dir, city_file)
    
    # upload in s3
    s3.meta.client.upload_file(raster_path, 
                               bucket_name, 
                               'data/vegetation/sentinel-2/'+ boundary_id + '-vegetation-cover-'+yearStr+'-NDVItheshold'+NDVIthresholdStr+'-50m.tif',
                               ExtraArgs={'ACL':'public-read'})