In [None]:
# import sys
# !{sys.executable} -m pip install pip earthengine-api
# !{sys.executable} -m pip install pip geemap

In [None]:
import ee
# ee.Authenticate()

In [None]:
ee.Initialize()

In [None]:
import geemap
import ipyleaflet
import numpy as np
import requests
import os
import pandas as pd
import rasterio
import boto3
import geopandas as gpd
import glob

In [None]:
# define directory
out_dir = os.getcwd()
bucket_name = 'cities-urbanshift' 
aws_s3_dir = 'https://'+bucket_name+'.s3.eu-west-3.amazonaws.com'

# Load data

In [None]:
# read forest carbon flux data
# paper: https://www.nature.com/articles/s41558-020-00976-6 
# on GEE: https://code.earthengine.google.com/b21e2ea3103b09710e5ff5004e6d2617

netFlux = ee.ImageCollection('projects/wri-datalab/gfw-data-lake/net-flux-forest-extent-per-ha-v1-2-2-2001-2021/net-flux-global-forest-extent-per-ha-2001-2021')
net_flux = netFlux.reduce(ee.Reducer.mean()); # for use in visualizations
net_fluxw0 = net_flux.unmask(0) # unmask to 0 to enable pixels without tree carbon flux to be included in mean calculation
fluxScale = net_flux.projection().nominalScale()
# Map.addLayer(net_flux,{palette:['green','white','red']})
# Map.addLayer(net_fluxw0,{palette:['green','white','red']},"",0)


In [None]:
# get list of cities
boundary_georef = pd.read_csv(aws_s3_dir+'/data/boundaries/v_0/boundary_georef.csv')
boundary_georef

# Extract data by city

In [None]:
for i in range(0,len(boundary_georef)):
    print(i)
    boundary_id = boundary_georef.loc[i, 'geo_name']+'-'+boundary_georef.loc[i, 'aoi_boundary_name']
    print(boundary_id)
    
    # read boundaries
    boundary_path = aws_s3_dir+'/data/boundaries/v_0/boundary-'+boundary_id+'.geojson'
    boundary_geo = requests.get(boundary_path).json()
    boundary_geo_ee = geemap.geojson_to_ee(boundary_geo)
    
    # Download ee.Image for TML as GeoTIFF
    geemap.ee_export_image_to_drive(
        net_flux, 
        description = boundary_id + '-WRI-ForestCarbonFluxes',
        folder='data', 
        scale=30, 
        region=boundary_geo_ee.geometry(),
        maxPixels = 5000000000
    )

# Upload in aws

Since we can't download directly the rasters locally due to their size, the rasters are stored in a google-drive folder and then downloaded locally in order to push them back to s3 bucket.

In [None]:
# connect to s3
aws_credentials = pd.read_csv('/home/jovyan/PlanetaryComputerExamples/aws_credentials.csv')
aws_key = aws_credentials.iloc[0]['Access key ID']
aws_secret = aws_credentials.iloc[0]['Secret access key']

s3 = boto3.resource(
    service_name='s3',
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret
)

In [None]:
for i in range(0, len(boundary_georef)):
    boundary_id = boundary_georef.loc[i, 'geo_name']+'-'+boundary_georef.loc[i, 'aoi_boundary_name']
    geo_name = boundary_georef.loc[i, 'geo_name']
    print("\n boundary_id: "+boundary_id)

    # read local raster
    city_file = 'data/' + boundary_id +'-WRI-ForestCarbonFluxes.tif'
    raster_path = os.path.join(out_dir, city_file)
    
    # upload in s3
    s3.meta.client.upload_file(raster_path, 
                               bucket_name, 
                               'data/tree_cover/wri-forest-carbon-fluxes/v_0/'+ boundary_id + '-WRI-ForestCarbonFluxes-MgCO2eperHA2001-2021.tif',
                               ExtraArgs={'ACL':'public-read'})