In [1]:
import io, sys, os, datetime, requests
from collections import defaultdict
import numpy as np
import pandas as pd
import shapely
import boto3
import geopandas as gpd
import io

In [3]:
import geemap
import ee
ee.Authenticate()

Enter verification code:  4/1ARtbsJp8IWziAQUNPkoPIB1ECiqXbyLAzSL0JaXIBKfxmyFTPzhZrTW57SI



Successfully saved authorization token.


In [4]:
ee.Initialize()

In [5]:
YEARS = [2010, 2015, 2020]
OUTFILE_NAME = 'cams-emissions.csv'
DO_UNIT = False

In [6]:
# GWPs from AR5 https://www.ipcc.ch/site/assets/uploads/2018/02/WG1AR5_Chapter08_FINAL.pdf

SPECIES_INFO = {
    'bc': {
        'name': 'black carbon',
        'filename_suffix': 'BC',
        'ghg': True,
        'gwp20': 460,
        'gwp100': 1600 * 0.94
        # Fuglestvedt et al. "global" values for GWPs
    },
    'ch4': {
        'name': 'methane',
        'filename_suffix': 'CH4',
        'ghg': True,
        'gwp20': 84,
        'gwp100': 28
    },
    'co': {
        'name': 'carbon monoxide',
        'filename_suffix': 'CO',
        'ghg': True,
        'gwp20': 7.65,
        'gwp100': 2.65 * 0.94
        # Midpoints of Fuglestvedt et al. "global" values for GWPs
    },
    'co2': {
        'name': 'carbon dioxide',
        'filename_suffix': '',
        'ghg': True,
        'gwp20': 1,
        'gwp100': 1
    },
#    'fossilco2': {
#        'name': 'carbon dioxide (excl short cycle)',

#    },
    'nox': {
        'name': 'nitrogen oxides',
        'filename_suffix': 'NOx',
        'ghg': True,
        'gwp20': 19,
        'gwp100': -11 * 0.94
        # Fuglestvedt et al. "global" values for GWPs
    },
    'so2': {
        'name': 'sulfur dioxide',
        'filename_suffix': 'SO2',
        'ghg': False
    },
    'oc': {
        'name': 'organic carbon',
        'filename_suffix': 'OC',
        'ghg': True,
        'gwp20': -240,
        'gwp100': -69 * 0.94
        # Fuglestvedt et al. "global" values for GWPs
    },
    'nh3': {
        'name': 'ammonia',
        'filename_suffix': 'NH3',
        'ghg': False
    },
    'nmvoc': {
        'name': 'non-methane volatile organic compounds',
        'filename_suffix': 'NMVOC',
        'ghg': True,
        'gwp20': 14,
        'gwp100': 4.5 * 0.94
        # Fuglestvedt et al. "global" values for GWPs
    }
}

SECTOR_INFO = {
    'agl': {
        'name': 'agriculture livestock',
        'band': 'b1'
    },
    'ags': {
        'name': 'agriculture soils',
        'band': 'b2'
    },
    'awb': {
        'name': 'agriculture waste burning',
        'band': 'b3'
    },
    'ene': {
        'name': 'power generation',
        'band': 'b5'
    },
    'fef': {
        'name': 'fugitives',
        'band': 'b6'
    },
    'ind': {
        'name': 'industry',
        'band': 'b7'
    },
    'res': {
        'name': 'residential, commercial, and other combustion',
        'band': 'b8'
    },
    'shp': {
        'name': 'ships',
        'band': 'b9'
    },
    'slv': {
        'name': 'solvents',
        'band': 'b10'
    },
    'sum': {
        'name': 'all sources',
        'band': 'b11'
    },
    'swd': {
        'name': 'solid waste and wastewater',
        'band': 'b12'
    },
    'tnr': {
        'name': 'off-road transportation',
        'band': 'b13'
    },
    'tro': {
        'name': 'road transportation',
        'band': 'b14'
    }
}

NOX_SECTOR_INFO = {
    'agl': {
        'name': 'agriculture livestock',
        'band': 'b1'
    },
    'ags': {
        'name': 'agriculture soils',
        'band': 'b2'
    },
    'awb': {
        'name': 'agriculture waste burning',
        'band': 'b3'
    },
    'ene': {
        'name': 'power generation',
        'band': 'b4'
    },
    'fef': {
        'name': 'fugitives',
        'band': 'b5'
    },
    'ind': {
        'name': 'industry',
        'band': 'b6'
    },
    'res': {
        'name': 'residential, commercial, and other combustion',
        'band': 'b7'
    },
    'shp': {
        'name': 'ships',
        'band': 'b8'
    },
    'slv': {
        'name': 'solvents',
        'band': 'b9'
    },
    'sum': {
        'name': 'all sources',
        'band': 'b10'
    },
    'swd': {
        'name': 'solid waste and wastewater',
        'band': 'b11'
    },
    'tnr': {
        'name': 'off-road transportation',
        'band': 'b12'
    },
    'tro': {
        'name': 'road transportation',
        'band': 'b13'
    }
}

YEAR_INFO = {
    year: 'b{}'.format(year - 1999) for year in YEARS
}

datasets = defaultdict(None)

In [7]:
for species in SPECIES_INFO:
    datasets[species] = ee.ImageCollection('users/emackres/CAMS-GLOB-ANTv42_yearly_totalTg{}{}'.format(['', '_'][int(len(SPECIES_INFO[species]['filename_suffix']) > 0)], SPECIES_INFO[species]['filename_suffix']))

In [8]:
# define directory
out_dir = os.getcwd()
aws_s3_dir = "https://cities-cities4forests.s3.eu-west-3.amazonaws.com/data"

In [9]:
# get list of c4f cities
boundary_georef = pd.read_csv('https://cities-cities4forests.s3.eu-west-3.amazonaws.com/data/boundaries/v_0/boundary_georef.csv')
boundary_georef

Unnamed: 0,city_name,geo_name,aoi_boundary_name,units_boundary_name,city_boundary_name,country_code,geo_level
0,Salvador,BRA-Salvador,ADM4union,ADM4,BRA-Salvador-ADM4,BRA,ADM4
1,Bukavu,COD-Bukavu,ADM3union,ADM3,COD-Bukavu-ADM3,COD,ADM3
2,Uvira,COD-Uvira,ADM3union,ADM3,COD-Uvira-ADM3,COD,ADM3
3,Brazzaville,COG-Brazzaville,ADM4union,ADM4,COG-Brazzaville-ADM4,COG,ADM4
4,Barranquilla,COL-Barranquilla,ADM4union,ADM4,COL-Barranquilla-ADM4,COL,ADM4
5,Addis_Ababa,ETH-Addis_Ababa,ADM4union,ADM4,ETH-Addis_Ababa-ADM4,ETH,ADM4
6,Dire_Dawa,ETH-Dire_Dawa,ADM3union,ADM3,ETH-Dire_Dawa-ADM3,ETH,ADM3
7,Nairobi,KEN-Nairobi,ADM3union,ADM3,KEN-Nairobi-ADM3,KEN,ADM3
8,Antananarivo,MDG-Antananarivo,ADM4union,ADM4,MDG-Antananarivo-ADM4,MDG,ADM4
9,Mexico_City,MEX-Mexico_City,ADM2union,ADM2,MEX-Mexico_City-ADM2,MEX,ADM2


In [10]:
def teragramperyear_to_tonneperyear(amt):
    return amt * 1000000

In [11]:
cams_emissions_indicator = pd.DataFrame()

In [12]:
%%time
for i in range(len(boundary_georef)):
    print(i)
    geo_name = boundary_georef.loc[i, 'geo_name']
    
    
    boundary_id_aoi = boundary_georef.loc[i, 'geo_name']+'-'+boundary_georef.loc[i, 'aoi_boundary_name']
    boundary_id_unit = boundary_georef.loc[i, 'geo_name']+'-'+boundary_georef.loc[i, 'units_boundary_name']
    
    # AOI
    boundary_id = boundary_id_aoi

    print("\n boundary_id_aoi: " + boundary_id)
    # read boundaries
    boundary_path = aws_s3_dir +'/boundaries/v_0/boundary-'+boundary_id+'.geojson'
    boundary_geo = requests.get(boundary_path).json()
    boundary_geo_ee = geemap.geojson_to_ee(boundary_geo)

    all_dfs = {}   # This dict stores all result dfs for single species-year pairs
    for species in SPECIES_INFO:
        print(SPECIES_INFO[species]['name'])
        species_data = datasets[species]
        for year in YEARS:
            print('   {}'.format(year))
            speciesyear_data = species_data.select(YEAR_INFO[year])
            emissions_Tg = speciesyear_data.map(lambda x: x.reduceRegions( boundary_geo_ee, ee.Reducer.sum(), 11131.948796096121, 'epsg:4326'))
            result_df = geemap.ee_to_pandas(emissions_Tg.flatten())  # This df has (numgeoms x numsectors) rows, and 1 data column

            result_df['emissions_tonnes'.format(species, year)] = teragramperyear_to_tonneperyear(result_df['sum'])
            all_dfs['{0}_{1}'.format(species, year)] = result_df.drop(columns=['sum']).copy()        

# reshape so that sector info is in columns instead of rows
    all_spyr_dfs = []  # This stores all complete data rows
    for sp_yr in all_dfs:
        df = all_dfs[sp_yr]
        species, year = sp_yr.split('_')
        geo_features = boundary_geo['features']
        sp_yr_dfs = []  # This stores all correctly shaped dfs for one species-year pair, each df for one geogr feature
        for f in geo_features:
            geo_id = f['properties']['geo_id']
            geo_results = df.loc[df['geo_id'] == geo_id]
            geo_info = geo_results[['geo_parent_name', 'geo_level', 'creation_date', 'geo_id', 'geo_name']].iloc[0]
            geo_info_pd = pd.DataFrame(geo_info).transpose()
            geo_info_pd = geo_info_pd.rename({geo_info_pd.index[0]: geo_id}, axis=0)
            georesults_transposed = geo_results.drop(columns=['geo_parent_name', 'geo_level', 'creation_date', 'geo_id', 'geo_name']).transpose()
            mapper = {
                 georesults_transposed.columns[j]: '{0}_{1}_{2}'.format(species, list(SECTOR_INFO.keys())[j], year) for j in range(len(list(SECTOR_INFO.keys())))
            }
            georesults_transposed_renamed = georesults_transposed.rename(columns=mapper).rename({'emissions_tonnes': geo_id}, axis=0)
            sp_yr_dfs.append(georesults_transposed_renamed)        

        allfeatures_df = pd.concat(sp_yr_dfs, axis=0)
        all_spyr_dfs.append(allfeatures_df)
    all_results = pd.concat(all_spyr_dfs, axis=1)
    cams_emissions_indicator = pd.concat([cams_emissions_indicator, all_results], axis=0)
    #cams_emissions_indicator.to_csv(OUTFILE_NAME)
    
    # UNIT
    if DO_UNIT:
        boundary_id = boundary_id_unit
        if boundary_id[-3:] != 'nan':
            print("\n boundary_id_unit: " + boundary_id)
            # read boundaries
            boundary_path = aws_s3_dir +'/boundaries/v_0/boundary-'+boundary_id+'.geojson'
            boundary_geo = requests.get(boundary_path).json()
            boundary_geo_ee = geemap.geojson_to_ee(boundary_geo)

            all_dfs = {}
            for species in SPECIES_INFO:
                print(SPECIES_INFO[species]['name'])
                species_data = datasets[species]
                for year in YEARS:
                    print('   {}'.format(year))
                    speciesyear_data = species_data.select(YEAR_INFO[year])
                    emissions_Tg = speciesyear_data.map(lambda x: x.reduceRegions( boundary_geo_ee, ee.Reducer.sum(), 11131.948796096121, 'epsg:4326'))
                    result_df = geemap.ee_to_pandas(emissions_Tg.flatten())

                    result_df['emissions_tonnes'.format(species, year)] = teragramperyear_to_tonneperyear(result_df['sum'])
                    all_dfs['{0}_{1}'.format(species, year)] = result_df.drop(columns=['sum']).copy()        

# reshape so that sector info is in columns instead of rows
            all_spyr_dfs = []
            for sp_yr in all_dfs:
                df = all_dfs[sp_yr]
                species, year = sp_yr.split('_')
                geo_features = boundary_geo['features']
                sp_yr_dfs = []
                for f in geo_features:
                    geo_id = f['properties']['geo_id']
                    geo_results = df.loc[df['geo_id'] == geo_id]
                    geo_info = geo_results[['geo_parent_name', 'geo_level', 'creation_date', 'geo_id', 'geo_name']].iloc[0]
                    geo_info_pd = pd.DataFrame(geo_info).transpose()
                    geo_info_pd = geo_info_pd.rename({geo_info_pd.index[0]: geo_id}, axis=0)
                    georesults_transposed = geo_results.drop(columns=['geo_parent_name', 'geo_level', 'creation_date', 'geo_id', 'geo_name']).transpose()
                    mapper = {
                         georesults_transposed.columns[j]: '{0}_{1}_{2}'.format(species, list(SECTOR_INFO.keys())[j], year) for j in range(len(list(SECTOR_INFO.keys())))
                    }
                    georesults_transposed_renamed = georesults_transposed.rename(columns=mapper).rename({'emissions_tonnes': geo_id}, axis=0)
                    sp_yr_dfs.append(georesults_transposed_renamed)        

                allfeatures_df = pd.concat(sp_yr_dfs, axis=0)
                all_spyr_dfs.append(allfeatures_df)
            all_results = pd.concat(all_spyr_dfs, axis=1)
            cams_emissions_indicator = pd.concat([cams_emissions_indicator, all_results], axis=0)
            #cams_emissions_indicator.to_csv(OUTFILE_NAME)
      
# # CALCULATE CO2e for all rows
# print('  CO2e')
# for year in YEARS:
#     print(year)
#     for sector in SECTOR_INFO:
#         cams_emissions_indicator['co2e-20_{0}_{1}'.format(sector, year)] = 0
#         cams_emissions_indicator['co2e-100_{0}_{1}'.format(sector, year)] = 0
#     for species in SPECIES_INFO:
#         if SPECIES_INFO[species]['ghg']:
#             for sector in SECTOR_INFO:
#                 cams_emissions_indicator['co2e-20_{0}_{1}'.format(sector, year)] = cams_emissions_indicator['co2e-20_{0}_{1}'.format(sector, year)] + (cams_emissions_indicator['{0}_{1}_{2}'.format(species, sector, year)] * SPECIES_INFO[species]['gwp20'])
#                 cams_emissions_indicator['co2e-100_{0}_{1}'.format(sector, year)] = cams_emissions_indicator['co2e-100_{0}_{1}'.format(sector, year)] + (cams_emissions_indicator['{0}_{1}_{2}'.format(species, sector, year)] * SPECIES_INFO[species]['gwp100'])

# #        cams_emissions_indicator = pd.concat([cams_emissions_indicator, df])
# cams_emissions_indicator.to_csv(OUTFILE_NAME)

0

 boundary_id_aoi: BRA-Salvador-ADM4union
black carbon
   2010
   2015
   2020
methane
   2010
   2015
   2020
carbon monoxide
   2010
   2015
   2020
carbon dioxide
   2010
   2015
   2020
nitrogen oxides
   2010
   2015
   2020
sulfur dioxide
   2010
   2015
   2020
organic carbon
   2010
   2015
   2020
ammonia
   2010
   2015
   2020
non-methane volatile organic compounds
   2010
   2015
   2020
1

 boundary_id_aoi: COD-Bukavu-ADM3union
black carbon
   2010
   2015
   2020
methane
   2010
   2015
   2020
carbon monoxide
   2010
   2015
   2020
carbon dioxide
   2010
   2015
   2020
nitrogen oxides
   2010
   2015
   2020
sulfur dioxide
   2010
   2015
   2020
organic carbon
   2010
   2015
   2020
ammonia
   2010
   2015
   2020
non-methane volatile organic compounds
   2010
   2015
   2020
2

 boundary_id_aoi: COD-Uvira-ADM3union
black carbon
   2010
   2015
   2020
methane
   2010
   2015
   2020
carbon monoxide
   2010
   2015
   2020
carbon dioxide
   2010
   2015
   2020
nit

In [13]:
cams_emissions_indicator

Unnamed: 0,bc_agl_2010,bc_ags_2010,bc_awb_2010,bc_ene_2010,bc_fef_2010,bc_ind_2010,bc_res_2010,bc_shp_2010,bc_slv_2010,bc_sum_2010,...,nmvoc_ene_2020,nmvoc_fef_2020,nmvoc_ind_2020,nmvoc_res_2020,nmvoc_shp_2020,nmvoc_slv_2020,nmvoc_sum_2020,nmvoc_swd_2020,nmvoc_tnr_2020,nmvoc_tro_2020
BRA-Salvador_ADM4-union_1,0.0,0.0,0.003912,0.040109,0.0,660.781049,51.678709,0.525874,0.0,753.414594,...,75.668461,8.295818,33572.1,593.820224,0.651642,6461.700908,43836.98,452.147896,0.0,2672.489704
COD-Bukavu_ADM3-union_1,0.0,0.0,0.00081,0.020506,0.0,42.887942,33.017903,0.0,0.0,76.524762,...,0.021754,3.529922,2110.913,542.368831,0.0,636.679576,3365.571,14.605391,0.0,57.426292
COD-Uvira_ADM3-union_1,0.0,0.0,0.366379,7e-06,0.0,28.414438,160.715237,0.001879,0.0,189.851251,...,0.00188,0.123881,1206.644,2797.879114,0.001418,472.180904,4533.375,9.861961,0.0,34.852917
COG-Brazzaville_ADM4-union_1,0.0,0.0,0.021076,0.148753,0.0,379.677994,215.06737,0.0,0.0,619.628119,...,3.233366,59.961784,52815.05,4529.986212,0.0,5688.861993,65414.7,162.27798,0.0,2154.651123
COL-Barranquilla_ADM4-union_1,0.0,0.0,0.015936,0.0,0.0,67.592416,30.248734,0.521759,0.0,111.81099,...,0.0,722.033523,1747.34,765.030536,4.237955,3102.200199,7151.692,114.688981,0.0,695.658823
ETH-Addis_Ababa_ADM4-union_1,0.0,0.0,1.307866,0.206602,0.0,8686.991676,1787.567539,0.0,0.0,10501.360285,...,5.021686,0.0,2188652.0,33813.850649,0.0,79088.015579,2304669.0,1269.762503,0.0,1796.005527
ETH-Dire_Dawa_ADM3-union_1,0.0,0.0,1.414746,1.120037,0.0,467.378443,239.88216,0.0,0.0,713.429805,...,27.223894,0.0,154255.9,4541.939033,0.0,3571.968449,162707.5,76.864198,0.0,189.201239
KEN-Nairobi_ADM3-union_1,0.0,0.0,0.151123,0.461589,0.0,3280.448253,1102.559361,0.0,0.0,4417.657091,...,11.780972,1.750813,568943.0,23578.85209,0.0,24638.272661,623433.2,527.695505,26.001834,5701.068762
MDG-Antananarivo_ADM4-union_1,0.0,0.0,0.118013,0.748465,1.6e-05,442.846599,141.623612,0.0,0.0,587.103354,...,0.2842,517.615002,84728.77,2581.713068,0.0,9612.237646,97788.05,104.420294,0.0,239.710437
MEX-Mexico_City_ADM2-union_1,0.0,0.0,21.965781,4.617427,0.0,872.745394,2204.291955,0.0,0.0,3423.353801,...,313.85787,458.408541,125057.9,30965.567999,0.0,261381.225336,470568.9,11224.683907,35.810453,40507.847339


In [22]:
# add geo id field
cities_indicators_GRE_2_1 = cams_emissions_indicator
cities_indicators_GRE_2_1['geo_id'] = cities_indicators_GRE_2_1.index

# Upload in aws s3

In [19]:
# connect to s3
aws_credentials = pd.read_csv('C:\\Users\\Saif.Shabou\\OneDrive - World Resources Institute\\Documents\\aws\\credentials.csv')
aws_key = aws_credentials.iloc[0]['Access key ID']
aws_secret = aws_credentials.iloc[0]['Secret access key']

s3 = boto3.resource(
    service_name='s3',
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret
)

In [23]:
# upload to aws
key_data = 'data/indicators/dev/cities_indicators_GRE_2_1.csv'
bucket_name = 'cities-cities4forests' 
cities_indicators_GRE_2_1.to_csv(
    f"s3://{bucket_name}/{key_data}",
    index=False,
    storage_options={
        "key": aws_key,
        "secret": aws_secret
    },
)

In [24]:
# make it public
object_acl = s3.ObjectAcl(bucket_name,key_data)
response = object_acl.put(ACL='public-read')