In [103]:
import json
import os
import boto3
import rasterio
import pandas as pd

# Create metadata output file

In [78]:
out_dir = os.getcwd()

In [17]:
metadata_file = os.path.join(out_dir, 'metadata\\metadata_landcover_esa.json')

# Load input data

In [88]:
# get list of urbanshift cities
boundary_georef = pd.read_csv('https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data/boundaries/v_0/boundary_georef.csv') 
boundary_georef = boundary_georef.dropna(subset=['units_boundary_name']).reset_index(drop=True)
print(boundary_georef.to_string())

             geo_name   level aoi_boundary_name units_boundary_name           city_name  country_name country_code continent
0         ARG-Mendoza  region         ADM3union                ADM3             Mendoza     Argentina          ARG   America
1   ARG-Mar_del_Plata    city              ADM3                ADM4  Mar del Plata city     Argentina          ARG   America
2         ARG-Ushuaia    city              ADM4                ADM5        Ushuaia city     Argentina          ARG   America
3           ARG-Salta  region         ADM2union                ADM3               Salta     Argentina          ARG   America
4    ARG-Buenos_Aires  region         ADM2union                ADM2        Buenos Aires     Argentina          ARG   America
5        BRA-Teresina    city         ADM4union                ADM4       Teresina city        Brazil          BRA   America
6        BRA-Teresina  region         ADM2union                ADM2     Teresina region        Brazil          BRA   America


# Create metadata file

In [141]:
dataset_metadata_cities = []

In [142]:
for i in range(0, len(boundary_georef)):
    
    boundary_id = boundary_georef.loc[i, 'geo_name']+'-'+boundary_georef.loc[i, 'aoi_boundary_name']
    geo_name = boundary_georef.loc[i, 'geo_name']
    city_name = boundary_georef.loc[i, 'city_name']
    country_name = boundary_georef.loc[i, 'country_name']
    continent_name = boundary_georef.loc[i, 'continent']

    print(boundary_id)
    
    # read dataset
    dataset = rasterio.open('https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data/land_use/esa_world_cover/v_0/'+boundary_id+'-ESA-world_cover-2000.tif')

    # fill metadata fields
    dataset_title = "ESA WorldCover land cover extract for " + city_name
    dataset_year = '2020'
    dataset_city_name = city_name
    dataset_country_name = country_name
    dataset_description = "The European Space Agency (ESA) WorldCover 10m 2020 product provides a global land cover map for 2020 at 10-meter resolution based on Sentinel-1 and Sentinel-2 data. The WorldCover product comes with 11 land cover classes, aligned with UN-FAO’s Land Cover Classification System, and has been generated in the framework of the ESA WorldCover project. The World Cover product comes with 11 land cover classes: Tree cover, Shrubland, Grassland, Cropland, Built-up, Bare / sparse vegetation, Snow and ice, Open water, Herbaceous wetland, Mangroves, Moss and lichen."
    dataset_snippet = "Land cover data for "+ city_name + ', ' + country_name + ' ('+ dataset_year + ')' 
    dataset_tags = [geo_name,
                    "Biodiversity",
                    "Land cover",
                    "Geography: " + continent_name,
                    "Geography: " + continent_name + ": " + country_name,
                    "Geography: " + continent_name + ": " + country_name + ": "+ city_name,
                    "Time: "+ dataset_year]
    dataset_spatial_resolution = '10m'
    dataset_temporal_resolution = 'yearly'
    dataset_spatial_extent = 'Global'
    dataset_temporal_extent = '2020'
    dataset_extent = [[dataset.bounds[0],dataset.bounds[1]],[dataset.bounds[2],dataset.bounds[3]]]
    dataset_format = 'raster'
    dataset_data_source = 'ESA WorldCover'
    dataset_source_url = 'https://esa-worldcover.org/en'
    dataset_provider = 'European Space Agency (ESA)'
    dataset_url = "https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data/land_use/esa_world_cover/v_0/"+boundary_id+"-ESA-world_cover-2000.tif"
    dataset_status = 'published'
    dataset_license = 'CC BY 4.0'
    dataset_crs = 'ESPG:4326'
    dataset_legend = [
        {
            "value": 10,
            "color": "006400",
            "class": "Trees"
        },
        {
            "value": 20,
            "color": "ffbb22",
            "class": "Shrubland"
        },
        {
            "value": 30,
            "color": "ffff4c",
            "class": "Grassland"
        },
        {
            "value": 40,
            "color": "f096ff",
            "class": "Cropland"
        },
        {
            "value": 50,
            "color": "fa0000",
            "class": "Built-up"
        },
        {
            "value": 60,
            "color": "b4b4b4",
            "class": "Barren / sparse vegetation"
        },
        {
            "value": 70,
            "color": "f0f0f0",
            "class": "Snow and ice"
        },
        {
            "value": 80,
            "color": "0064c8",
            "class": "Open water"
        },
        {
            "value": 90,
            "color": "0096a0",
            "class": "Herbaceous wetland"
        },
        {
            "value": 95,
            "color": "00cf75",
            "class": "Mangroves"
        },
        {
            "value": 100,
            "color": "fae6a0",
            "class": "Moss and lichen"
        }
    ]
    
    # create a dictionary
    dataset_metadata_city = {
        'title': dataset_title,
        'city_name': dataset_city_name,
        'country': dataset_country_name,
        'city_id': geo_name,
        'description': dataset_description,
        'snippet': dataset_snippet,
        'tags': dataset_tags,
        'year': dataset_year,
        'spatial resolution': dataset_spatial_resolution,
        'temporal resolution': dataset_temporal_resolution,
        'spatial extent': dataset_spatial_extent,
        'temporal extent': dataset_temporal_extent,
        'extent': dataset_extent,
        'format': dataset_format,
        'data_source': dataset_data_source,
        'source_url': dataset_source_url, 
        'provider': dataset_provider,
        'url': dataset_url,
        'status': dataset_status,
        'license': dataset_license,
        'crs': dataset_crs,
        'legend': dataset_legend,
    }
    
    # append cities
    dataset_metadata_cities.append(dataset_metadata_city)
    

ARG-Mendoza-ADM3union
ARG-Mar_del_Plata-ADM3
ARG-Ushuaia-ADM4
ARG-Salta-ADM2union
ARG-Buenos_Aires-ADM2union
BRA-Teresina-ADM4union
BRA-Teresina-ADM2union
BRA-Florianopolis-ADM4union
BRA-Florianopolis-ADM2union
BRA-Belem-ADM4union
BRA-Belem-ADM2union
CRI-San_Jose-ADM2union
RWA-Kigali-ADM4union
SLE-Freetown-ADM4cityunion
SLE-Freetown-ADM4regionunion
MAR-Marrakech-ADM2
IND-Chennai-ADM4union
IND-Chennai-ADM6union
IND-Pune-ADM4union
IND-Surat-ADM4union
CHN-Chengdu-ADM3union
CHN-Chongqing-ADM1
CHN-Ningbo-ADM3union
IDN-Jakarta-ADM4union
IDN-Bitung-ADM2
IDN-Semarang-ADM1
IDN-Balikpapan-ADM4union
IDN-Palembang-ADM2


In [143]:
with open(metadata_file, 'w') as f:
    json.dump(dataset_metadata_cities, f)

# Upload to aws s3

In [144]:
# connect to s3
aws_credentials = pd.read_csv('C:\\Users\\Saif.Shabou\\OneDrive - World Resources Institute\\Documents\\aws\\credentials.csv')
aws_key = aws_credentials.iloc[0]['Access key ID']
aws_secret = aws_credentials.iloc[0]['Secret access key']

s3 = boto3.resource(
    service_name='s3',
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret
)

# specify bucket name
bucket_name = 'cities-urbanshift' 

In [145]:
# upload in s3
s3.meta.client.upload_file(metadata_file, 
                           bucket_name, 
                           'data/land_use/esa_world_cover/v_0/metadata.json',
                           ExtraArgs={'ACL':'public-read'})