In [22]:
import os
import pandas as pd
import boto3
import geopandas as gpd
import numpy as np
import boto3

# Load data

In [3]:
# land use land cover stats
cities_lulc_stats = pd.read_csv('https://cities-urbanshift.s3.eu-west-3.amazonaws.com/indicators/cities_lulc_stats.csv') 
cities_lulc_stats.head()

Unnamed: 0,geo_id,geo_name,geo_parent_name,geo_level,lulc_class_code,percent,lulc_class_label,data_source,year
0,ARG-Buenos_Aires_ADM-2_1,José C. Paz,ARG-Buenos_Aires,ADM-2,10,0.237981,Trees,ESA world cover,2020
1,ARG-Buenos_Aires_ADM-2_2,La Matanza,ARG-Buenos_Aires,ADM-2,10,0.128051,Trees,ESA world cover,2020
2,ARG-Buenos_Aires_ADM-2_3,Berazategui,ARG-Buenos_Aires,ADM-2,10,0.253611,Trees,ESA world cover,2020
3,ARG-Buenos_Aires_ADM-2_4,Vicente López,ARG-Buenos_Aires,ADM-2,10,0.128976,Trees,ESA world cover,2020
4,ARG-Buenos_Aires_ADM-2_5,Moreno,ARG-Buenos_Aires,ADM-2,10,0.396034,Trees,ESA world cover,2020


# Compute SICB-1: percent of natural areas by feature

In [4]:
# Select esa land cover
cities_lulc_stats_esa = cities_lulc_stats[cities_lulc_stats['data_source'] == 'ESA world cover']

In [18]:
df = cities_lulc_stats_esa
# recoding class labels
mapper_natural_areas = {'Trees': '1', 
                        'Shrubland': '1', 
                        'Grassland': '1',
                        'Cropland': '0',
                        'Built-up': '0',
                        'Barren / sparse vegetation': '0',
                        'Snow and ice': '0',
                        'Open water': '0',
                        'Herbaceous wetland': '1',
                        'Mangroves': '1',
                        'Moss and lichen': '1'}
df['class'] = df['lulc_class_label'].map(mapper_natural_areas).fillna(df['lulc_class_label'])
# group by natural areas classes
df_classes = df.groupby(['geo_id', 'class']).agg({'percent': 'sum'}).reset_index()
# filter natural areas
df_classes_natural = df_classes[df_classes['class']=='1']
df_classes_natural = df_classes_natural.reset_index(drop=True)
# rename columns
df_classes_natural = df_classes_natural.rename(columns={"percent": "SICB_1_percent_of_natural_areas"})
df_classes_natural.head()

Unnamed: 0,geo_id,class,SICB_1_percent_of_natural_areas
0,ARG-Buenos_Aires_ADM-2-union_1,1,0.585685
1,ARG-Buenos_Aires_ADM-2_1,1,0.33094
2,ARG-Buenos_Aires_ADM-2_10,1,0.401922
3,ARG-Buenos_Aires_ADM-2_11,1,0.072892
4,ARG-Buenos_Aires_ADM-2_12,1,0.094891


# Merge with indicator table

In [13]:
# read indicator table
cities_indicators = pd.read_csv('https://cities-urbanshift.s3.eu-west-3.amazonaws.com/indicators/cities_indicators.csv') 

In [15]:
def merge_indicators(indicator_table, new_indicator_table, indicator_name):
    if indicator_name in indicator_table.columns:
        print("replace by new indicators")
        indicator_table.drop(indicator_name, inplace=True, axis=1)
        cities_indicators_df = indicator_table.merge(new_indicator_table[["geo_id",indicator_name]], 
                                                     on='geo_id', 
                                                     how='left')
    else:
        print("add new indicators")
        cities_indicators_df = indicator_table.merge(new_indicator_table[["geo_id",indicator_name]], 
                                                     on='geo_id', 
                                                     how='left')
    return(cities_indicators_df)

In [20]:
cities_indicators_merged = merge_indicators(indicator_table = cities_indicators,
                                            new_indicator_table = df_classes_natural,
                                            indicator_name = "SICB_1_percent_of_natural_areas")

add new indicators


In [21]:
cities_indicators_merged.head()

Unnamed: 0,geo_id,geo_level,geo_name,geo_parent_name,SICB_1_percent_of_natural_areas
0,ARG-Buenos_Aires_ADM-1_1,ADM-1,ARG-Buenos_Aires,ARG-Buenos_Aires,
1,ARG-Buenos_Aires_ADM-2_1,ADM-2,José C. Paz,ARG-Buenos_Aires,0.33094
2,ARG-Buenos_Aires_ADM-2_2,ADM-2,La Matanza,ARG-Buenos_Aires,0.34366
3,ARG-Buenos_Aires_ADM-2_3,ADM-2,Berazategui,ARG-Buenos_Aires,0.699419
4,ARG-Buenos_Aires_ADM-2_4,ADM-2,Vicente López,ARG-Buenos_Aires,0.165703


# Upload in aws s3

In [23]:
# connect to s3
aws_credentials = pd.read_csv('C:\\Users\\Saif.Shabou\\OneDrive - World Resources Institute\\Documents\\aws\\credentials.csv')
aws_key = aws_credentials.iloc[0]['Access key ID']
aws_secret = aws_credentials.iloc[0]['Secret access key']

s3 = boto3.resource(
    service_name='s3',
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret
)

In [24]:
# upload to aws
key_data = 'indicators/cities_indicators.csv'
bucket_name = 'cities-urbanshift' 
cities_indicators_merged.to_csv(
    f"s3://{bucket_name}/{key_data}",
    index=False,
    storage_options={
        "key": aws_key,
        "secret": aws_secret
    },
)

In [25]:
# make it public
object_acl = s3.ObjectAcl(bucket_name,key_data)
response = object_acl.put(ACL='public-read')