In [None]:
import geopandas as gpd
import pandas as pd
import os
import io
import requests
import boto3

This script intitate the dataframes that will store the different indicators calauclated for cities.
- Input: georef file containing the list of cities' names and that we use for retrieving the boundaries files from the aws s3 (you can find more information about the georeferential-file in [this wiki page](https://github.com/wri/cities-urbanshift/wiki/Tutorial#administrative-boundaries))
- Output: A data frame containing the list of cities and sub-cities with their uniques identifies. This dataframe will be used as a based for integrating the indicators. It is stored in aws s3 bucket.

Run the script only when you want to initialize all the indicators.

# Load input data

In [None]:
# define directory
out_dir = os.getcwd()
bucket_name = 'cities-indicators'
aws_s3_dir = "https://"+bucket_name+".s3.eu-west-3.amazonaws.com"
boundary_ext = '/data/boundaries/'
indicators_file_aws = 'indicators/indicators.csv'

In [None]:
# get list of cities
boundary_georef = pd.read_csv(aws_s3_dir + boundary_ext + 'boundary_georef.csv')
boundary_georef

# Fill initial indicator table with the list of features

In [None]:
# create empty df
cities_indicators = pd.DataFrame()

In [None]:
# populate the df with the list of geo identifiers
for i in range(0, len(boundary_georef)):
    print(i)
    geo_name = boundary_georef.loc[i, 'geo_name']
    print("\n geo_name: "+geo_name)
    
    # get boundary area of interest
    boundary_id_aoi = boundary_georef.loc[i, 'geo_name']+'-'+boundary_georef.loc[i, 'aoi_boundary_name']
    boundary_path = aws_s3_dir + boundary_ext +'boundary-'+boundary_id_aoi+'.geojson'
    boundary_data = gpd.read_file(boundary_path)
    boundary_data = boundary_data[["geo_id","geo_level", "geo_name","geo_parent_name"]]
    
    # append to dataframe
    cities_indicators = pd.concat([cities_indicators, boundary_data])
    
    # get boundary unit of analysis
    boundary_id_unit = boundary_georef.loc[i, 'geo_name']+'-'+boundary_georef.loc[i, 'units_boundary_name']
    boundary_path = aws_s3_dir + boundary_ext +'boundary-'+boundary_id_unit+'.geojson'
    boundary_data = gpd.read_file(boundary_path)
    boundary_data = boundary_data[["geo_id","geo_level", "geo_name","geo_parent_name"]]
    
    # append to dataframe
    cities_indicators = pd.concat([cities_indicators, boundary_data])
    

In [None]:
cities_indicators

# Upload in aws s3

In [None]:
# connect to s3
aws_credentials = pd.read_csv('/home/jovyan/PlanetaryComputerExamples/aws_credentials.csv')
# aws_credentials = pd.read_csv('C:\\Users\\Saif.Shabou\\OneDrive - World Resources Institute\\Documents\\aws\\credentials.csv')
aws_key = aws_credentials.iloc[0]['Access key ID']
aws_secret = aws_credentials.iloc[0]['Secret access key']

s3 = boto3.resource(
    service_name='s3',
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret
)

In [None]:
# upload to aws
key_data = indicators_file_aws
cities_indicators.to_csv(
    f"s3://{bucket_name}/{key_data}",
    index=False,
    storage_options={
        "key": aws_key,
        "secret": aws_secret
    },
)

In [None]:
# make it public
object_acl = s3.ObjectAcl(bucket_name,key_data)
response = object_acl.put(ACL='public-read')