In [1]:
import io, sys, os, datetime, requests
from collections import defaultdict
import numpy as np
import pandas as pd
import xarray as xr
import netCDF4
import shapely
import boto3
import geopandas as gpd
import io

In [2]:
import geemap
import ee
ee.Authenticate()

Enter verification code: 4/1AdQt8qggk0rShYW6OK4lYM8qDiJQVgTr1ezuQK64NLlxCh4agfP5Qi3ymPQ

Successfully saved authorization token.


In [3]:
ee.Initialize()

In [4]:
SPECIES_INFO = {
    'no2': {
        'name': 'nitrogen dioxide',
        'molar_mass': 46.0055,
        'cams_unit': 'kg/kg',
        'who_threshold': 25.0,
    },
    'so2': {
        'name': 'sulfur dioxide',
        'molar_mass': 64.066,
        'cams_unit': 'kg/kg',
        'who_threshold': 40.0
    },
    'o3': {    # Ozone thresholds are based on 8-hour average, not 24-hour.
               # We use averages at 9am, noon, 3pm to get a 9-hour average at peak O3 production.
        'name': 'ozone',
        'molar_mass': 48.0,
        'cams_unit': 'kg/kg',
        'who_threshold': 100.0
    },
    'pm25': {
        'name': 'fine particulate matter',
        'cams_unit': 'kg/m^3',
        'who_threshold': 5.0
    },
    'pm10': {
        'name': 'coarse particulate matter',
        'cams_unit': 'kg/m^3',
        'who_threshold': 45.0
    },
    'co': {
        'name': 'carbon monoxide',
        'molar_mass': 28.01,
        'cams_unit': 'kg/kg',
        'who_threshold': 7.0
    }
}
datasets = defaultdict(None)

In [5]:
# connect to s3
aws_credentials = pd.read_csv('C:\\Users\\Saif.Shabou\\OneDrive - World Resources Institute\\Documents\\aws\\credentials.csv')
aws_key = aws_credentials.iloc[0]['Access key ID']
aws_secret = aws_credentials.iloc[0]['Secret access key']

s3 = boto3.client(
    service_name='s3',
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret
)

In [6]:
bucket = 'cities-cities4forests'
for species in SPECIES_INFO:
    local_filename = 'cams-eac4_{}_sfc_2020.nc'.format(species)
#     f = s3.download_file(bucket, 'data/air_pollution/cams/cams-eac4_{}_sfc_2020.nc'.format(species), local_filename)
    datasets[species] = xr.open_dataset(local_filename)

In [7]:
# define directory
out_dir = os.getcwd()
aws_s3_dir = "https://cities-cities4forests.s3.eu-west-3.amazonaws.com/data"

In [9]:
# get list of c4f cities
boundary_georef = pd.read_csv('https://cities-cities4forests.s3.eu-west-3.amazonaws.com/data/boundaries/v_0/boundary_georef.csv')
boundary_georef

Unnamed: 0,city_name,geo_name,aoi_boundary_name,units_boundary_name,city_boundary_name,country_code,geo_level
0,Salvador,BRA-Salvador,ADM4union,ADM4,BRA-Salvador-ADM4,BRA,ADM4
1,Bukavu,COD-Bukavu,ADM3union,ADM3,COD-Bukavu-ADM3,COD,ADM3
2,Uvira,COD-Uvira,ADM3union,ADM3,COD-Uvira-ADM3,COD,ADM3
3,Brazzaville,COG-Brazzaville,ADM4union,ADM4,COG-Brazzaville-ADM4,COG,ADM4
4,Barranquilla,COL-Barranquilla,ADM4union,ADM4,COL-Barranquilla-ADM4,COL,ADM4
5,Addis_Ababa,ETH-Addis_Ababa,ADM4union,ADM4,ETH-Addis_Ababa-ADM4,ETH,ADM4
6,Dire_Dawa,ETH-Dire_Dawa,ADM3union,ADM3,ETH-Dire_Dawa-ADM3,ETH,ADM3
7,Nairobi,KEN-Nairobi,ADM3union,ADM3,KEN-Nairobi-ADM3,KEN,ADM3
8,Antananarivo,MDG-Antananarivo,ADM4union,ADM4,MDG-Antananarivo-ADM4,MDG,ADM4
9,Mexico_City,MEX-Mexico_City,ADM2union,ADM2,MEX-Mexico_City-ADM2,MEX,ADM2


In [10]:
def massfraction_to_concentration(massfraction):
    # input masses in kg, volumes in m^3
    # returns ug/m^3
    # 10^9 ug/kg
    # air density 1.223803 kg/m3 from https://confluence.ecmwf.int/display/UDOC/L60+model+level+definitions
    return massfraction * 1.223803 * 10**9

In [11]:
def massfraction_to_ppm(massfraction, species_molarmass):
    AIR_MOLARMASS = 28.97    # g/mol
    return massfraction * (1.0 / species_molarmass) * AIR_MOLARMASS * 10**6

In [12]:
def kilogrampersquaremeter_to_microgrampersquaremeter(conc):
    return conc * 10**9

In [13]:
def exceedancedays(species, lon, lat):
    speciesdata = datasets[species]
    threshold = SPECIES_INFO[species]['who_threshold']
    localdata = speciesdata.sel(latitude=lat, longitude=lon, method='nearest')
    if SPECIES_INFO[species]['cams_unit'] == 'kg/kg':
        conc = massfraction_to_concentration(localdata)
    elif SPECIES_INFO[species]['cams_unit'] == 'kg/m^3':
        conc = kilogrampersquaremeter_to_microgrampersquaremeter(localdata)
    else:
        raise Exception('Unknown CAMS unit')
    dailymax = pd.DataFrame()
    dailymax['thedata'] = conc.to_array()[0]
    dailymax = dailymax.set_index(conc.time.to_index())
    dailymax = dailymax.resample('D').mean()
    return np.sum(dailymax.thedata >= threshold)

In [18]:
cams_multispecies_aq_indicator = pd.DataFrame() 

In [19]:
for i in range(0, len(boundary_georef)):
    print(i)
    geo_name = boundary_georef.loc[i, 'geo_name']
    
    
    boundary_id_aoi = boundary_georef.loc[i, 'geo_name']+'-'+boundary_georef.loc[i, 'aoi_boundary_name']
    boundary_id_unit = boundary_georef.loc[i, 'geo_name']+'-'+boundary_georef.loc[i, 'units_boundary_name']
    
    print("\n geo_name: "+boundary_id_aoi)
    
    for boundary_id in [boundary_id_aoi, boundary_id_unit]:
        if boundary_id[-3:] != 'nan':
            print("\n boundary_id_aoi: "+boundary_id_aoi)
            # read boundaries
            boundary_path = aws_s3_dir +'/boundaries/v_0/boundary-'+boundary_id+'.geojson'
            boundary_geo = requests.get(boundary_path).json()
            boundary_geo_ee = geemap.geojson_to_ee(boundary_geo)
            shape = shapely.geometry.shape(boundary_geo['features'][0]['geometry'])
            centroid = shape.centroid
            clon, clat = centroid.coords[0]  # Breaks if multipolygon
            df = geemap.ee_to_pandas(boundary_geo_ee)

            for species in SPECIES_INFO:
                print(SPECIES_INFO[species]['name'])
                df['exceedancedays {}'.format(SPECIES_INFO[species]['name'])] = exceedancedays(species, clon, clat)
            cams_multispecies_aq_indicator = pd.concat([cams_multispecies_aq_indicator, df])

0

 geo_name: BRA-Salvador-ADM4union

 boundary_id_aoi: BRA-Salvador-ADM4union
nitrogen dioxide
sulfur dioxide
ozone
fine particulate matter
coarse particulate matter
carbon monoxide

 boundary_id_aoi: BRA-Salvador-ADM4union
nitrogen dioxide
sulfur dioxide
ozone
fine particulate matter
coarse particulate matter
carbon monoxide
1

 geo_name: COD-Bukavu-ADM3union

 boundary_id_aoi: COD-Bukavu-ADM3union
nitrogen dioxide
sulfur dioxide
ozone
fine particulate matter
coarse particulate matter
carbon monoxide

 boundary_id_aoi: COD-Bukavu-ADM3union
nitrogen dioxide
sulfur dioxide
ozone
fine particulate matter
coarse particulate matter
carbon monoxide
2

 geo_name: COD-Uvira-ADM3union

 boundary_id_aoi: COD-Uvira-ADM3union
nitrogen dioxide
sulfur dioxide
ozone
fine particulate matter
coarse particulate matter
carbon monoxide

 boundary_id_aoi: COD-Uvira-ADM3union
nitrogen dioxide
sulfur dioxide
ozone
fine particulate matter
coarse particulate matter
carbon monoxide
3

 geo_name: COG-Brazzavill

In [20]:
cams_multispecies_aq_indicator

Unnamed: 0,geo_parent_name,geo_level,creation_date,geo_id,geo_name,exceedancedays nitrogen dioxide,exceedancedays sulfur dioxide,exceedancedays ozone,exceedancedays fine particulate matter,exceedancedays coarse particulate matter,exceedancedays carbon monoxide
0,BRA-Salvador,ADM4-union,2022-08-03,BRA-Salvador_ADM4-union_1,BRA-Salvador,0,0,0,266,3,366
0,BRA-Salvador,ADM4,2022-08-03,BRA-Salvador_ADM4_1,Pituaçu,0,0,0,266,3,366
1,BRA-Salvador,ADM4,2022-08-03,BRA-Salvador_ADM4_2,Patamares,0,0,0,266,3,366
2,BRA-Salvador,ADM4,2022-08-03,BRA-Salvador_ADM4_3,Piatã,0,0,0,266,3,366
3,BRA-Salvador,ADM4,2022-08-03,BRA-Salvador_ADM4_4,Boca do Rio,0,0,0,266,3,366
...,...,...,...,...,...,...,...,...,...,...,...
135,RWA-Musanze,ADM5,2022-08-26,RWA-Musanze_ADM5_136,Burengo,0,366,14,366,366,366
136,RWA-Musanze,ADM5,2022-08-26,RWA-Musanze_ADM5_137,Bwamazi,0,366,14,366,366,366
137,RWA-Musanze,ADM5,2022-08-26,RWA-Musanze_ADM5_138,Kadahenda,0,366,14,366,366,366
138,RWA-Musanze,ADM5,2022-08-26,RWA-Musanze_ADM5_139,Karwesero,0,366,14,366,366,366


In [21]:
cities_indicators_GRE_2_2 = cams_multispecies_aq_indicator
cities_indicators_GRE_2_2 = cities_indicators_GRE_2_2.rename(columns={"exceedancedays nitrogen dioxide": "GRE_2_2_nb_exceedance_days_nitroge_dioxide",
                                                                     "exceedancedays sulfur dioxide": "GRE_2_2_nb_exceedance_days_sulfur_dioxide",
                                                                      "exceedancedays ozone": "GRE_2_2_nb_exceedance_days_ozone",
                                                                     "exceedancedays fine particulate matter": "GRE_2_2_nb_exceedance_days_fine_particulate_matter",
                                                                     "exceedancedays coarse particulate matter": "GRE_2_2_nb_exceedance_days_coarse_particulate_matter",
                                                                     "exceedancedays carbon monoxide": "GRE_2_2_nb_exceedance_days_coarse_carbon_monoxide"})
cities_indicators_GRE_2_2

Unnamed: 0,geo_parent_name,geo_level,creation_date,geo_id,geo_name,GRE_2_2_nb_exceedance_days_nitroge_dioxide,GRE_2_2_nb_exceedance_days_sulfur_dioxide,exceedancedays ozone,GRE_2_2_nb_exceedance_days_fine_particulate_matter,GRE_2_2_nb_exceedance_days_coarse_particulate_matter,GRE_2_2_nb_exceedance_days_coarse_carbon_monoxide
0,BRA-Salvador,ADM4-union,2022-08-03,BRA-Salvador_ADM4-union_1,BRA-Salvador,0,0,0,266,3,366
0,BRA-Salvador,ADM4,2022-08-03,BRA-Salvador_ADM4_1,Pituaçu,0,0,0,266,3,366
1,BRA-Salvador,ADM4,2022-08-03,BRA-Salvador_ADM4_2,Patamares,0,0,0,266,3,366
2,BRA-Salvador,ADM4,2022-08-03,BRA-Salvador_ADM4_3,Piatã,0,0,0,266,3,366
3,BRA-Salvador,ADM4,2022-08-03,BRA-Salvador_ADM4_4,Boca do Rio,0,0,0,266,3,366
...,...,...,...,...,...,...,...,...,...,...,...
135,RWA-Musanze,ADM5,2022-08-26,RWA-Musanze_ADM5_136,Burengo,0,366,14,366,366,366
136,RWA-Musanze,ADM5,2022-08-26,RWA-Musanze_ADM5_137,Bwamazi,0,366,14,366,366,366
137,RWA-Musanze,ADM5,2022-08-26,RWA-Musanze_ADM5_138,Kadahenda,0,366,14,366,366,366
138,RWA-Musanze,ADM5,2022-08-26,RWA-Musanze_ADM5_139,Karwesero,0,366,14,366,366,366


# Upload in aws s3

In [22]:
# connect to s3
aws_credentials = pd.read_csv('C:\\Users\\Saif.Shabou\\OneDrive - World Resources Institute\\Documents\\aws\\credentials.csv')
aws_key = aws_credentials.iloc[0]['Access key ID']
aws_secret = aws_credentials.iloc[0]['Secret access key']

s3 = boto3.resource(
    service_name='s3',
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret
)

In [23]:
# upload to aws
key_data = 'data/indicators/dev/cities_indicators_GRE_2_2.csv'
bucket_name = 'cities-cities4forests' 
cities_indicators_GRE_2_2.to_csv(
    f"s3://{bucket_name}/{key_data}",
    index=False,
    storage_options={
        "key": aws_key,
        "secret": aws_secret
    },
)

In [24]:
# make it public
object_acl = s3.ObjectAcl(bucket_name,key_data)
response = object_acl.put(ACL='public-read')