<h2><center> <font color='cyan'> Workflow for UCRA </font> </center></h2>
<br>
<font color='cyan'> Create a workflow comprising of section for each notebook </font> 
<h3><center> <font color='cyan'>Table of Contents</font>   </center></h3>


[Preface:   Paths and Visual Inspection](#section0)
<br>
[Section 1: Urban land and built-up area data processing](#section1)
<br>
[Section 2: Process Sea level rise: Clip and reproject the rasters](#section2) 
<br>
[Section 3: Process CCKP Vars: Exports summary statistics](#section3) 
<br>
[Section 4: Process  historical SPEI data: Exports summary statistics](#section4) 
<br>
[Section 5: Process PM2.5 data : Exports summary statistics](#section5) 
<br>
[Section 6: Process heat increase due to urban land expansion : Exports summary statistics](#section6) 
<br>
[Section 7: Process Landslide Data : Exports summary statistics](#section7) 
<br>
[Section 8: Process drought data : Exports summary statistics](#section8) 
<br>
[Section 9: Exports summary statistics for vars with csvs](#section9) 
<br>
[Section 10:Export LST data from GEE](#section10) 
<br>
[Section 11:Arcpy stats. TBD](#section11) 
<br>
[Section 12:Package data for sharing. TBD](#section12) 


In [1]:
import pandas as pd
import numpy as np
import  zipfile
import os, math
import geopandas as gpd
from shutil import copyfile
from pathlib import Path
from shapely.geometry import Polygon
from functools import reduce
import plotly.express as px
import contextily as cx
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from fiona.crs import from_epsg
import fiona
from random import randint

import rasterio
import rasterio.mask
from rasterio.plot import show
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.merge import merge
import xarray as xr
from rasterio.features import shapes

# from rasterstats import zonal_stats
from xrspatial.zonal import stats as zonal_stats
from xrspatial.zonal import crosstab as zonal_crosstab

from os.path import exists
from pathlib import Path
import requests

import geemap
import ee
#Initialize EE
ee.Initialize()

<a id='section0'></a>
<h5><center> <font color='cyan'> Preface:Set paths and Then Visually Inspect AOIs</font>   </center></h5>


***Path setup***

In [2]:
parent_dir = Path(os.getcwd()).parents[0]
print(parent_dir)
#Set country country
country= "Bangladesh"
directory = os.path.join(parent_dir, country)
# map_dir= os.path.join(directory, "maps")
# if not os.path.exists(map_dir):
#     os.mkdir(map_dir)
os.chdir(directory)
print(directory)

crp_dir = Path(os.getcwd()).parents[1]
aoi_folder = Path('data/AOI')
output_folder = Path('data')

# cities = ['Chittagong']
cities_shapefile= ('NodalPourashavasInPolygonShariatpurFixed.shp')
data = gpd.read_file(Path('shapefile') /cities_shapefile).to_crs(4326)
data["city_name"]=data["Name"].str.replace("'","")
data["city_name"]=data["city_name"]
data0=data
# Combine the clusters with indivivdual cities than then run the script on that 
cities_shapefile= ('ClustersOutline.shp')
data1 = gpd.read_file(Path('shapefile') /cities_shapefile).to_crs(4326)
data1["city_name"]=data1.Cluster.apply(lambda x: "cluster"+str(x))
data1["Name"]=data1.Cluster.apply(lambda x: "cluster"+str(x))
data1["city_name"]=data1["Name"].str.replace("'","")
dflist=[ data0, data1]
gdf = gpd.GeoDataFrame( pd.concat( dflist, ignore_index=True) )
gdf = gdf.to_crs(4326)
gdf.to_file(Path('shapefile') / 'compiled_clusters_plus_cities.shp' , crs = 'EPSG:4326')
data= gdf

# data["city_name"]=data["city_name"].str.lower()
cities = data0["city_name"].unique()
cities=cities.tolist()
for city in cities:
    gdf = data.loc[data["city_name"] == city]
    gdf.to_file(Path('shapefile') / (city.replace(" ", "_").lower() + '.shp'))
    
def create_folder(name):
    try:
        os.mkdir(name)
    except FileExistsError:
        pass

create_folder('data') # folder for raw data
create_folder('shapefile')
create_folder('output') # folder for processed data
create_folder('output/drought')
create_folder('plots') # folder for plots
create_folder('stats') # folder for derived statistics
create_folder('stats/drought')
create_folder('stats/CCKP')
create_folder('stats/spei')
create_folder('maps') # folder for maps

# cities = ['Kinshasa', 'Kananga', 'Mbuji-Mayi', 'Lubumbashi', 'Kisangani', 'Bukavu', 'Goma', 'Tshikapa', 'Mwene-Ditu', 'Gemena', 'Gbadolite', 'Matadi', 'Kikwit', 'Bunia']
# cities = ['Chittagong']
for city in cities:
    print(city)
    create_folder(Path(city))
    create_folder(city / Path('maps'))
    create_folder(city / Path('data'))
    create_folder(city / Path('data') / 'AOI')
    create_folder(city / Path('stats'))
for city in cities:
    shp_4326 = gpd.read_file(Path('shapefile') / (city.replace(" ", "_").lower() + '.shp')).to_crs(epsg = 4326)
    shp_4326.to_file(Path(city) / 'data/AOI' / (city.replace(" ", "_").lower() + '_AOI.shp'))
with open('centroids.csv', 'w') as f:
    f.write('city,x,y,utm\n')
    for city in cities:
        centroid = gpd.read_file(Path(city) / 'data/AOI' / (city.replace(" ", "_").lower() + '_AOI.shp')).centroid
        # centroid = gpd.read_file(Path(city) / 'data/AOI' / (city.replace("'", "").lower() + '_AOI.shp')).centroid
        f.write('%s,%s,%s,%s\n'%(city, centroid.x[0], centroid.y[0], 32600+math.ceil((centroid.x[0]+180)/6)))

# all_countries = gpd.read_file((Path('data')/"wb_countries_admin0_10m/WB_countries_Admin0_10m.shp"))
# country_shp = all_countries[all_countries.NAME_EN == 'Bangladesh']
# country_shp.to_file(Path(('shapefile') / (country.lower().replace(' ', '_') + '.shp')))
centroids = pd.read_csv('centroids.csv')
gdf = gpd.GeoDataFrame(centroids, 
                       geometry = gpd.points_from_xy(centroids.x, centroids.y))

gdf.to_file('shapefile/centroids.shp', crs = 'EPSG:4326')
epsg_dict = dict(zip(centroids.city, centroids.utm))
epsg_dict


c:\Users\Aziz\Dropbox\CRP\UCRA
c:\Users\Aziz\Dropbox\CRP\UCRA\Bangladesh
Benapole
Coxs Bazar
Jessore
Kushtia
Madhabdi
Panchagarh
Saidpur
Mirsharai
Feni
Madaripur
Natore
Bogura
Dinajpur
Shariatpur



  centroid = gpd.read_file(Path(city) / 'data/AOI' / (city.replace(" ", "_").lower() + '_AOI.shp')).centroid

  centroid = gpd.read_file(Path(city) / 'data/AOI' / (city.replace(" ", "_").lower() + '_AOI.shp')).centroid

  centroid = gpd.read_file(Path(city) / 'data/AOI' / (city.replace(" ", "_").lower() + '_AOI.shp')).centroid

  centroid = gpd.read_file(Path(city) / 'data/AOI' / (city.replace(" ", "_").lower() + '_AOI.shp')).centroid

  centroid = gpd.read_file(Path(city) / 'data/AOI' / (city.replace(" ", "_").lower() + '_AOI.shp')).centroid

  centroid = gpd.read_file(Path(city) / 'data/AOI' / (city.replace(" ", "_").lower() + '_AOI.shp')).centroid

  centroid = gpd.read_file(Path(city) / 'data/AOI' / (city.replace(" ", "_").lower() + '_AOI.shp')).centroid

  centroid = gpd.read_file(Path(city) / 'data/AOI' / (city.replace(" ", "_").lower() + '_AOI.shp')).centroid

  centroid = gpd.read_file(Path(city) / 'data/AOI' / (city.replace(" ", "_").lower() + '_AOI.shp')).centroid

  centroi

In [None]:
STop

In [3]:
%%capture
def fill_small_holes(row):
    """
    This function plugs small holes.
    Arg: 
        geodataframe 
    returns:
        fixed geodataframe
    """
    newgeom=None
    rings = [i for i in row["geometry"].interiors] #List all interior rings
    if len(rings)>0: #If there are any rings
        to_fill = [Polygon(ring) for ring in rings if Polygon(ring).area<sizelim] #List the ones to fill
        if len(to_fill)>0: #If there are any to fill
            newgeom = reduce(lambda geom1, geom2: geom1.union(geom2),[row["geometry"]]+to_fill) #Union the original geometry with all holes
    if newgeom:
        return newgeom
    else:
        return row["geometry"]
def map(df):
    df['centroid'] = df['geometry'].centroid
    df['lat'] = df['centroid'].y
    df['lon'] = df['centroid'].x
    lat= df['centroid'].y
    lon= df['centroid'].x
    
    fig_data_density = px.choropleth_mapbox(df,
                           geojson=df.geometry,
                           locations=df.index, 
                           color=df.Name, #color_continuous_scale='Reds', #scale
                           opacity=.4,
                           center={"lat": lat[0], "lon": lon[0]}, #mapbox_style="open-street-map",
                           zoom=6,
                           )
    fig_data_density.update_traces(marker_line_width=10)

    s = "https://services.arcgisonline.com/arcgis/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}"
    fig_data_density.update_layout( 
        coloraxis_showscale=False,
        margin={"r": 0, "t": 0, "l": 0, "b": 0},
        mapbox_style="white-bg",
        mapbox_layers=[
            {
                "below": "traces",
                "sourcetype": "raster",
                "sourceattribution": "United States Geological Survey",
                "source": [s],
                # "type": "line",
                # "color": "red"
            }
        ],
    )

    fig_data_density.data[0].hovertemplate = "<span style='font-size:1.2rem; font-weight=400'>Region = %{z:,.0f}</span><br><br>"
    return fig_data_density

def export_map(map_dir, vector_file, crs, legend_title,  visualize_column, title, map_output):
    ax = vector_file.to_crs(crs).plot(figsize=(10, 10), \
                                   column= visualize_column, \
                                   alpha=0.6,  \
                                   facecolor='none',edgecolor='black' , \
                                   linewidth=4 , 
                                   legend=True,  
                                   legend_kwds={'loc':'upper right', 
                                                'bbox_to_anchor':(1, 1), 
                                                'markerscale':1.01, 
                                                'title_fontsize':'small', 
                                                'fontsize':'x-small'
                                                }  
                                        )

    cx.add_basemap(ax, source=cx.providers.Esri.WorldImagery,  crs=crs) 
    minx, miny, maxx, maxy = vector_file.total_bounds
    ax.set_xlim(minx, maxx)
    ax.set_ylim(miny, maxy)
    # Legends
    LegendElement = [
                    Line2D([0],[0],color='black',lw=4,label=f'{legend_title}')
                    ]
    ax.legend(handles=LegendElement,loc='upper right')
    plt.xticks([])
    plt.yticks([])   
    ax.title.set_text(f'{title}')
    plt.tight_layout()
    ax.figure.savefig(map_output)

def export_map_of_each_city(df):
    for index, row in df.iterrows():
        name=row["Name"]  
        print(f"index:{index}, name:{name}") 
        df_filtered= df.loc[df['Name'] == name]
        title=name.replace("'" , "")
        crs = 4326
        legend_title= 'Boundary'
        visualize_column='Name'
        map_output= os.path.join(Path('maps') , f"{title}_map.png" )
        export = export_map(Path('maps'), vector_file=df_filtered, crs=crs, legend_title=legend_title,  \
                        visualize_column=visualize_column, title=title, map_output=map_output)

# Uncomment for visual inspection 
sizelim = 1000 #Fill holes less than 1000 m2
# df = gpd.read_file(os.path.join(country, r"shapefile\NodalPourashavasInPolygonShariatpurFixed.shp"))
df=data
# df['geometry'] =df['geometry']
# df["geometry"] = df.apply(fill_small_holes, axis=1)
# export_map_of_each= export_map_of_each_city(df)


<a id='section1'></a>
<h5><center> <font color='cyan'> Section 1:Urban land and built-up area data processing</font>   </center></h5>

**Set paths and create centroids for the AOIs**

In [4]:
# Section 1 paths
int_output_folder = Path('output/urbanland')
# # Raw data folder. Change file path as needed
data_folder= crp_dir.joinpath('FCS/data/urbanland')
cities = pd.read_csv('centroids.csv').city
centroids = pd.read_csv('centroids.csv')
epsg_dict = dict(zip(centroids.city, centroids.utm))
year_list = [2050, 2100]
# SSP_list = [1, 2, 3]
SSP_list = [ 2, 5]
try:
    os.mkdir(int_output_folder)
except FileExistsError:
    pass


In [5]:
# crop the global raster file to the country extent (with a slight buffer), so that there is no need to reproject the entire globe
def clip_builtup_rasters(shp, data_folder, int_output_folder):
    features = shp.geometry
    for SSP in SSP_list:
        for year in year_list:
            out_file = 'ssp' + str(SSP) + '_' + str(year) + '_' + country.replace(' ', '_').lower() + '.tif'
            if not exists(int_output_folder / out_file):
                with rasterio.open(data_folder / ('ssp' + str(SSP) + '-geotiff') / ('ssp' + str(SSP) + '_' + str(year) + '.tif')) as src:
                    out_image, out_transform = rasterio.mask.mask(
                        src, features, crop=True)
                    out_meta = src.meta.copy()

                out_meta.update({"driver": "GTiff",
                                "height": out_image.shape[1],
                                "width": out_image.shape[2],
                                "transform": out_transform})

                with rasterio.open(int_output_folder / out_file, "w", **out_meta) as dest:
                    dest.write(out_image)
         

In [6]:
# reproject the raster files as needed and clip them to the city extents
def clipdata_bu_proj(SSP, year, city):
    city_no_space = city.replace(" ", "_")
    city_lower = city_no_space.lower()
    crs = epsg_dict[city]
    shp_name = city_no_space + '_AOI.shp'
    shp = gpd.read_file(city / aoi_folder / shp_name).to_crs(epsg = crs)
    features = shp.geometry
    
    projected_raster = 'ssp' + str(SSP) + '_' + str(year) + '_' + country.replace(' ', '_').lower() + '_' + str(crs) + '.tif'
    unprojected_raster = 'ssp' + str(SSP) + '_' + str(year) + '_' + country.replace(' ', '_').lower() + '.tif'
    if not exists(int_output_folder / projected_raster):
        with rasterio.open(int_output_folder / unprojected_raster) as src:
            dst_crs = 'EPSG:' + str(crs)

            transform, width, height = calculate_default_transform(
                src.crs, dst_crs, src.width, src.height, *src.bounds)
            kwargs = src.meta.copy()
            kwargs.update({
                'crs': dst_crs,
                'transform': transform,
                'width': width,
                'height': height
            })

            with rasterio.open(int_output_folder / projected_raster, 'w', **kwargs) as dst:
                for i in range(1, src.count + 1):
                    reproject(
                        source=rasterio.band(src, i),
                        destination=rasterio.band(dst, i),
                        src_transform=src.transform,
                        src_crs=src.crs,
                        dst_transform=transform,
                        dst_crs=dst_crs,
                        resampling=Resampling.nearest)
    
    with rasterio.open(int_output_folder / projected_raster) as src:
        out_image, out_transform = rasterio.mask.mask(
            src, features, crop=True)
        out_meta = src.meta.copy()
        
    out_meta.update({"driver": "GTiff",
                     "height": out_image.shape[1],
                     "width": out_image.shape[2],
                     "transform": out_transform})
    
    out_file = city_lower + '_bu_ssp' + str(SSP) + "_" + str(year) + '.tif'
    with rasterio.open(city / output_folder / out_file, "w", **out_meta) as dest:
        dest.write(out_image)

In [7]:
shp = gpd.read_file(Path('shapefile') / (country.replace(' ', '_').lower() + '.shp')).buffer(2)
clip_builtup= clip_builtup_rasters(shp, data_folder, int_output_folder)
for city in cities:
    for SSP in SSP_list:
        for year in year_list:
            clipdata_bu_proj(SSP, year, city)


  shp = gpd.read_file(Path('shapefile') / (country.replace(' ', '_').lower() + '.shp')).buffer(2)


<a id='section2'></a>
<h5><center> <font color='cyan'> Section 2: Process Sea level rise: Clip and reproject the rasters</font>   </center></h5>


In [8]:
# Raw data folder. Change file path as needed
data_folder= crp_dir.joinpath('FCS/data/climatecentral')
# create a corresponding folder on an external hard drive to store large raster files (intermediate outputs). Change file path as needed
int_output_folder = Path(int_output_folder.parents[0]/'SLR')
try:
    os.mkdir(int_output_folder)
except FileExistsError:
    pass

year_list = [2050, 2100]
slr_list = ['', '_RL10']

In [9]:
# get country shapefile extent
def smart_append(element, ls):
    if not element in ls:
        ls.append(element)

def create_lat_lon_list(shp_bounds):
    lat_list = []
    for i in range(len(shp_bounds)):
        if math.floor(shp_bounds.miny[i]) >= 0:
            hemi = 'N'
            for y in range(math.floor(shp_bounds.miny[i]), math.ceil(shp_bounds.maxy[i])):
                smart_append(hemi + str(y).zfill(2), lat_list)
        elif math.ceil(shp_bounds.maxy[i]) >= 0:
            for y in range(0, math.ceil(shp_bounds.maxy[i])):
                smart_append('N' + str(y).zfill(2), lat_list)
            for y in range(math.floor(shp_bounds.miny[i]), 0):
                smart_append('S' + str(-y).zfill(2), lat_list)
        else:
            hemi = 'S'
            for y in range(math.floor(shp_bounds.miny[i]), math.ceil(shp_bounds.maxy[i])):
                smart_append(hemi + str(-y).zfill(2), lat_list)

    lon_list = []

    for i in range(len(shp_bounds)):
        if math.floor(shp_bounds.minx[i]) >= 0:
            hemi = 'E'
            for x in range(math.floor(shp_bounds.minx[i]), math.ceil(shp_bounds.maxx[i])):
                smart_append(hemi + str(x).zfill(3), lon_list)
        elif math.ceil(shp_bounds.maxx[i]) >= 0:
            for x in range(0, math.ceil(shp_bounds.maxx[i])):
                smart_append('E' + str(x).zfill(3), lon_list)
            for x in range(math.floor(shp_bounds.minx[i]), 0):
                smart_append('W' + str(-x).zfill(3), lon_list)
        else:
            hemi = 'W'
            for x in range(math.floor(shp_bounds.minx[i]), math.ceil(shp_bounds.maxx[i])):
                smart_append(hemi + str(-x).zfill(3), lon_list)

    return lat_list, lon_list     

In [10]:
# copy all the identified tiles into one folder
def copy_relevent_tiffs(data_folder , int_output_folder , lat_list, lon_list):
    for year in year_list:
        for slr in slr_list:
            try:
                os.mkdir(int_output_folder / ('rcp45_50_' + str(year) + slr))
            except FileExistsError:
                pass
            for lat in lat_list:
                for lon in lon_list:
                    # data_subfolder = 'K14U17_rcp45_50.0_' + str(year) + slr
                    data_subfolder = 'AR6_ssp245_mediumconfidence_50.0_' + str(year) + slr
                    if not exists(int_output_folder / ('rcp45_50_' + str(year) + slr) / (lat + lon + '.tif')):
                        try:
                            copyfile(data_folder / data_subfolder / (lat + lon + '.tif'),
                                    int_output_folder / ('rcp45_50_' + str(year) + slr) / (lat + lon + '.tif'))
                        except FileNotFoundError:
                            pass

    # merge the tiles into one raster file
    for year in year_list:
        for slr in slr_list:
            raster_to_mosaic = []
            mosaic_file = 'rcp45_50_' + str(year) + slr + '.tif'
            
            mosaic_list = list((int_output_folder / ('rcp45_50_' + str(year) + slr)).iterdir())
            for p in mosaic_list:
                raster = rasterio.open(p)
                raster_to_mosaic.append(raster)
            
            mosaic, output = merge(raster_to_mosaic)
            output_meta = raster.meta.copy()
            output_meta.update(
                {"driver": "GTiff",
                    "height": mosaic.shape[1],
                    "width": mosaic.shape[2],
                    "transform": output,
                }
            )
            
            with rasterio.open(int_output_folder / mosaic_file, 'w', **output_meta) as m:
                m.write(mosaic)

In [11]:
# reproject the raster files as needed and clip them to the city extents
def clipdata_slr(slr, year, city):
    city_no_space = city.replace(" ", "_")
    city_lower = city_no_space.lower()
    crs = epsg_dict[city]
    shp_name = city_no_space + '_AOI.shp'
    shp = gpd.read_file(city / aoi_folder / shp_name).to_crs(epsg = crs)
    features = shp.geometry
    
    projected_raster = 'rcp45_50_' + str(year) + slr + '_' + str(crs) + '.tif'
    unprojected_raster = 'rcp45_50_' + str(year) + slr + '.tif'
    if not exists(int_output_folder / projected_raster):
        with rasterio.open(int_output_folder / unprojected_raster) as src:
            dst_crs = 'EPSG:' + str(crs)

            transform, width, height = calculate_default_transform(
                src.crs, dst_crs, src.width, src.height, *src.bounds)
            kwargs = src.meta.copy()
            kwargs.update({
                'crs': dst_crs,
                'transform': transform,
                'width': width,
                'height': height
            })

            with rasterio.open(int_output_folder / projected_raster, 'w', **kwargs) as dst:
                for i in range(1, src.count + 1):
                    reproject(
                        source=rasterio.band(src, i),
                        destination=rasterio.band(dst, i),
                        src_transform=src.transform,
                        src_crs=src.crs,
                        dst_transform=transform,
                        dst_crs=dst_crs,
                        resampling=Resampling.nearest)
    
    try:
        with rasterio.open(int_output_folder / projected_raster) as src:
            out_image, out_transform = rasterio.mask.mask(
                src, features, crop=True)
            out_meta = src.meta.copy()

        out_meta.update({"driver": "GTiff",
                         "height": out_image.shape[1],
                         "width": out_image.shape[2],
                         "transform": out_transform})
        
        if np.nansum(out_image) != 0:
            out_file = city_lower + '_slr' + slr + "_" + str(year) + '.tif'
            with rasterio.open(city / output_folder / out_file, "w", **out_meta) as dest:
                dest.write(out_image)
    except ValueError:
        pass

In [12]:
shp_bounds = gpd.read_file(Path('shapefile') / (country.replace(' ', '_').lower() + '.shp')).bounds
lat_list, lon_list= create_lat_lon_list(shp_bounds)
copy_files= copy_relevent_tiffs(data_folder , int_output_folder , lat_list, lon_list  )

In [13]:
for city in cities:
    for slr in slr_list:
        for year in year_list:
            clipdata_slr(slr, year, city)

<a id='section3'></a>
<h5><center> <font color='cyan'> Section 3: Process CCKP Vars: Exports summary statistics</font>   </center></h5>


In [14]:
# Raw data folder. Change file path as needed
data_folder= Path('data/CCKP')
# create a corresponding folder on an external hard drive to store large raster files (intermediate outputs). Change file path as needed
int_output_folder = Path(int_output_folder.parents[0]/'CCKP')
try:
    os.mkdir(int_output_folder)
except FileExistsError:
    pass

In [15]:
# ssps = ['119', '245', '370']
ssps = ['245', '585'] #For Bangladesh cluster analysis
periods = ['2040-2059']
varias0 = ['tas', 'txx', 'pr', 'r95ptot', 'cdd']
varias1 = ['hd35', 'tr26', 'wsdi', 'r20mm', 'r50mm']
varias = varias0 + varias1
rps = ['20yr', '50yr']

In [16]:
abs_val = dict({s: dict({p: dict({v: {} for v in varias + ['spei12']}) for p in periods}) for s in ssps})
ano_val = dict({s: dict({p: dict({v: {} for v in varias}) for p in periods}) for s in ssps})

In [17]:
# processing for variables without timedelta
for ssp in ssps:
    for period in periods:
        for varia in varias0:
            clim = xr.open_dataset(data_folder / ('climatology-'+varia+'-annual-mean_cmip6_annual_all-regridded-bct-ssp'+ssp+'-climatology_median_'+period+'.nc'))
            anom = xr.open_dataset(data_folder / ('anomaly-'+varia+'-annual-mean_cmip6_annual_all-regridded-bct-ssp'+ssp+'-climatology_median_'+period+'.nc'))
            
            for index, row in centroids.iterrows():
                x_coords = [row['x']-1, row['x'], row['x']+1]
                y_coords = [row['y']-1, row['y'], row['y']+1]
                clim_val_coords = []
                clim_val_vals = []
                anom_val_coords = []
                anom_val_vals = []
                
                for x in x_coords:
                    for y in y_coords:
                        clim_val_df = clim.sel(lon = x, lat = y, method = 'nearest').to_dataframe()
                        anom_val_df = anom.sel(lon = x, lat = y, method = 'nearest').to_dataframe()
                        if not (clim_val_df['lon'][0], clim_val_df['lat'][0]) in clim_val_coords:
                            clim_val_coords.append((clim_val_df['lon'][0], clim_val_df['lat'][0]))
                            clim_val_vals.append(clim_val_df['climatology-'+varia+'-annual-mean'].mean())
                        if not (anom_val_df['lon'][0], anom_val_df['lat'][0]) in anom_val_coords:
                            anom_val_coords.append((anom_val_df['lon'][0], anom_val_df['lat'][0]))
                            anom_val_vals.append(anom_val_df['anomaly-'+varia+'-annual-mean'].mean())
                
                clim_val = np.nanmean(clim_val_vals)
                anom_val = np.nanmean(anom_val_vals)
                
                abs_val[ssp][period][varia][row['city']] = clim_val
                ano_val[ssp][period][varia][row['city']] = anom_val

In [18]:
# processing for variables with timedelta
for ssp in ssps:
    for period in periods:
        for varia in varias1:
            clim = xr.open_dataset(data_folder / ('climatology-'+varia+'-annual-mean_cmip6_annual_all-regridded-bct-ssp'+ssp+'-climatology_median_'+period+'.nc'))
            anom = xr.open_dataset(data_folder / ('anomaly-'+varia+'-annual-mean_cmip6_annual_all-regridded-bct-ssp'+ssp+'-climatology_median_'+period+'.nc'))
            
            for index, row in centroids.iterrows():
                x_coords = [row['x']-1, row['x'], row['x']+1]
                y_coords = [row['y']-1, row['y'], row['y']+1]
                clim_val_coords = []
                clim_val_vals = []
                anom_val_coords = []
                anom_val_vals = []
                
                for x in x_coords:
                    for y in y_coords:
                        clim_val_df = clim.sel(lon = x, lat = y, method = 'nearest').to_dataframe()
                        anom_val_df = anom.sel(lon = x, lat = y, method = 'nearest').to_dataframe()
                        if not (clim_val_df['lon'][0], clim_val_df['lat'][0]) in clim_val_coords:
                            clim_val_coords.append((clim_val_df['lon'][0], clim_val_df['lat'][0]))
                            clim_val_vals.append(np.timedelta64(clim_val_df['climatology-'+varia+'-annual-mean'].mean()).astype('timedelta64[D]') / np.timedelta64(1, 'D'))
                        if not (anom_val_df['lon'][0], anom_val_df['lat'][0]) in anom_val_coords:
                            anom_val_coords.append((anom_val_df['lon'][0], anom_val_df['lat'][0]))
                            anom_val_vals.append(np.timedelta64(anom_val_df['anomaly-'+varia+'-annual-mean'].mean()).astype('timedelta64[D]') / np.timedelta64(1, 'D'))
                
                clim_val = np.nanmean(clim_val_vals)
                anom_val = np.nanmean(anom_val_vals)
                
                abs_val[ssp][period][varia][row['city']] = clim_val
                ano_val[ssp][period][varia][row['city']] = anom_val

In [19]:
# processing for spei12
varia = 'spei12'
for ssp in ssps:
    for period in periods:
        clim = xr.open_dataset(data_folder / ('climatology-'+varia+'-annual-mean_cmip6_annual_all-regridded-bct-ssp'+ssp+'-climatology_median_'+period+'.nc'))

        for index, row in centroids.iterrows():
                x_coords = [row['x']-1, row['x'], row['x']+1]
                y_coords = [row['y']-1, row['y'], row['y']+1]
                clim_val_coords = []
                clim_val_vals = []
                
                for x in x_coords:
                    for y in y_coords:
                        clim_val_df = clim.sel(lon = x, lat = y, method = 'nearest').to_dataframe()
                        if not (clim_val_df['lon'][0], clim_val_df['lat'][0]) in clim_val_coords:
                            clim_val_coords.append((clim_val_df['lon'][0], clim_val_df['lat'][0]))
                            clim_val_vals.append(clim_val_df['climatology-'+varia+'-annual-mean'].mean())
                
                clim_val = np.nanmean(clim_val_vals)
                abs_val[ssp][period][varia][row['city']] = clim_val

In [20]:
# write to csv
for ssp in ssps:
    for varia in varias + ['spei12']:
        with open('stats/CCKP/clim_'+varia+'_ssp'+ssp+'.csv', 'w') as f:
            f.write('city,'+periods[0]+'\n')
            for city in centroids.city:
                f.write("%s,%s\n"%(city, abs_val[ssp][periods[0]][varia][city]))
        if varia != 'spei12':
            with open('stats/CCKP/anom_'+varia+'_ssp'+ssp+'.csv', 'w') as f:
                f.write('city,'+periods[0]+'\n')
                for city in centroids.city:
                    f.write("%s,%s\n"%(city, ano_val[ssp][periods[0]][varia][city]))

In [21]:
aep_val = dict({s: dict({r: {} for r in rps}) for s in ssps})
# processing for future return period change factor
for ssp in ssps:
    for period in ['2035-2064']:
        for rp in rps:
            aep = xr.open_dataset(data_folder / ('changefactorfaep'+rp+'-rx5day-period-mean_cmip6_period_all-regridded-bct-ssp'+ssp+'-climatology_median_'+period+'.nc'))
            
            for index, row in centroids.iterrows():
                x_coords = [row['x']-1, row['x'], row['x']+1]
                y_coords = [row['y']-1, row['y'], row['y']+1]
                aep_val_coords = []
                aep_val_vals = []
                
                for x in x_coords:
                    for y in y_coords:
                        aep_val_df = aep.sel(lon = x, lat = y, method = 'nearest').to_dataframe()
                        if not (aep_val_df['lon'][0], aep_val_df['lat'][0]) in aep_val_coords:
                            aep_val_coords.append((aep_val_df['lon'][0], aep_val_df['lat'][0]))
                            aep_val_vals.append(aep_val_df['changefactorfaep'+rp+'-rx5day-period-mean'].mean())
                
                aep_val1 = np.nanmean(aep_val_vals)
                
                aep_val[ssp][rp][row['city']] = aep_val1

In [22]:
# write to csv
for ssp in ssps:
    for rp in rps:
        with open('stats/CCKP/aep_'+rp+'_ssp'+ssp+'.csv', 'w') as f:
            f.write('city,'+'2035-2064'+'\n')
            for city in centroids.city:
                f.write("%s,%s\n"%(city, aep_val[ssp][rp][city]))

<a id='section4'></a>
<h5><center> <font color='cyan'> Section 4: Process  historical SPEI data: Exports summary statistics</font>   </center></h5>


***Set Data Path and years configs***

In [23]:
# Raw data folder. Change file path as needed
data_folder= Path('data/SPEI')
periods = ['01', '12', '48']
years = range(2011, 2021)

In [24]:
spei_val = dict({p: dict({c: {} for c in centroids.city}) for p in periods})
for period in periods:
    spei_nc = xr.open_dataset(data_folder / ('spei'+period+'.nc'))
    for index, row in centroids.iterrows():
        for year in years:
            for month in range(1, 13):
                time1 = str(year) + '-' + str(month) + '-15'
                val = spei_nc.sel(lon = row['x'], lat = row['y'], time = time1, method = 'nearest')['spei'].to_dict()['data']
                spei_val[period][row['city']][time1] = val
# write to csv
for period in periods:
    with open('stats/spei/spei'+period+'.csv', 'w') as f:
        f.write('city,date,spei\n')
        for city in centroids.city:
            for year in years:
                for month in range(1, 13):
                    time1 = str(year) + '-' + str(month) + '-15'
                    f.write('%s,%s,%s\n' % (city, time1, spei_val[period][city][time1]))

<a id='section5'></a>
<h5><center> <font color='cyan'> Section 5: Process PM2.5 data : Exports summary statistics</font>   </center></h5>

In [25]:
data_folder = Path('data\Global Annual PM2.5 Grids 1998-2019')
def unzip_files(data_folder):
    extension = ".zip"
    os.chdir(data_folder) # change directory from working dir to dir with files
    for item in os.listdir(data_folder): # loop through items in dir
        if item.endswith(extension): # check for ".zip" extension
            file_name = os.path.abspath(item) # get full path of files
            zip_ref = zipfile.ZipFile(file_name) # create zipfile object
            zip_ref.extractall(data_folder) # extract file to dir
            zip_ref.close() # close file
            os.remove(file_name) # delete zipped file

In [26]:
def clipdata_air(city, year):
    city_no_space = city.replace(" ", "_")
    city_lower = city_no_space.lower()
    file = city / aoi_folder / (city_lower + '_AOI.shp')
    
    with fiona.open(file, "r") as shapefile:
        features = [feature["geometry"] for feature in shapefile]
        
        # Raw data folder. Change file path as needed
        # input_raster = "F:/World Bank/City Scan/data/Global Annual PM2.5 Grids 1998-2019/sdei-global-annual-gwr-pm2-5-modis-misr-seawifs-aod-v4-gl-03-" + str(year) + ".tif"
        input_raster = f"{data_folder}/sdei-global-annual-gwr-pm2-5-modis-misr-seawifs-aod-v4-gl-03-" + str(year) + ".tif"
        with rasterio.open(input_raster) as src:
            # shapely presumes all operations on two or more features exist in the same Cartesian plane.
            out_image, out_transform = rasterio.mask.mask(
                src, features, crop=True)
            out_meta = src.meta.copy()

        out_meta.update({"driver": "GTiff",
                         "height": out_image.shape[1],
                         "width": out_image.shape[2],
                         "transform": out_transform})

        output_file = city_lower + '_air_quality_' + str(year) + '.tif'
        with rasterio.open(Path(city) / output_folder / output_file, "w", **out_meta) as dest:
            dest.write(out_image)

In [27]:

for city in cities:
    for year in range(1998, 2020):
        clipdata_air(city, year)

<a id='section6'></a>
<h5><center> <font color='cyan'> Section 6: Process heat increase due to urban land expansion : Exports summary statistics</font>   </center></h5>

In [28]:
data_folder = Path('data/Heat increase due to urban land expansion')
shp_folder = Path('data/AOI')

def reproj_heat(input_raster):
    filename = input_raster + '.tif'
    outfile = input_raster + '_4326.tif'
    with rasterio.open(data_folder / filename) as src:
        dst_crs = 'EPSG:4326'
        
        transform, width, height = calculate_default_transform(
            src.crs, dst_crs, src.width, src.height, *src.bounds)
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': dst_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        with rasterio.open(data_folder / outfile, 'w', **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=dst_crs,
                    resampling=Resampling.nearest)

def clip_heat(input_raster):
    input_raster_name = input_raster + '_4326.tif'
    with rasterio.open(data_folder / input_raster_name) as src:
        out_image, out_transform = rasterio.mask.mask(
            src, features, crop=True)
        out_meta = src.meta.copy()
        
    out_meta.update({"driver": "GTiff",
                     "height": out_image.shape[1],
                     "width": out_image.shape[2],
                     "transform": out_transform})
    
    out_file = city.replace(' ', '_').lower() + '_' + input_raster + '.tif'
    with rasterio.open(city / output_folder / out_file, "w", **out_meta) as dest:
        dest.write(out_image)
                



In [29]:
# raster_list = ['urban-ssp1_day_sum', 'urban-ssp1_nig_sum',
#                'urban-ssp2_day_sum', 'urban-ssp2_nig_sum',
#                'urban-ssp3_day_sum', 'urban-ssp3_nig_sum']
raster_list = ['urban-ssp2_day_sum', 'urban-ssp2_nig_sum',
               'urban-ssp5_day_sum', 'urban-ssp5_nig_sum']
for city in cities:
    aoi_name = city.replace(' ', '_').lower() + '_AOI.shp'
    shp = gpd.read_file(city/shp_folder/aoi_name) #.buffer(0.01)
    features = shp.geometry
    for raster in raster_list:
        reproj_heat(raster)
        clip_heat(raster)

<a id='section7'></a>
<h5><center> <font color='cyan'> Section 7: Process Landslide Data : Exports summary statistics</font>   </center></h5>

In [30]:
def reproj_ls(input_folder, input_raster):
    filename = input_raster + '.tif'
    outfile = input_raster + '_4326.tif'
    with rasterio.open(input_folder/filename) as src:
        dst_crs = 'EPSG:4326'
        transform, width, height = calculate_default_transform(
            src.crs, dst_crs, src.width, src.height, *src.bounds)
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': dst_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        with rasterio.open(input_folder / outfile, 'w', **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=dst_crs,
                    resampling=Resampling.nearest)

In [31]:
def clipdata_ls(city):
    city_no_space = city.replace(" ", "_")
    city_lower = city_no_space.lower()
    file = city / aoi_folder / (city_lower + '_AOI.shp')
    print('1')
    
    with fiona.open(file, "r") as shapefile:
        features = [feature["geometry"] for feature in shapefile]
        # Raw data folder. Change file path as needed
        # # input_raster = r'F:\World Bank\City Scan\data\Landslide\Global landslide hazard map - Rainfall trigger (1980-2018, median) - COG.tif'
        print('2')
        input_raster = Path(data_folder/'LS_RF_Median_1980_2018_COG_4326.tif')
        with rasterio.open(input_raster) as src:
            # shapely presumes all operations on two or more features exist in the same Cartesian plane.
            out_image, out_transform = rasterio.mask.mask(
                src, features, crop=True)
            out_meta = src.meta.copy()
            print('3')

        out_meta.update({"driver": "GTiff",
                         "height": out_image.shape[1],
                         "width": out_image.shape[2],
                         "transform": out_transform,
                         'nodata': 0})
        
        print('4')
        if np.nansum(out_image) != 0:
            output_file = city_lower + '_landslide.tif'
            print('5')
            with rasterio.open(Path(city) / output_folder / output_file, "w", **out_meta) as dest:
                dest.write(out_image)
                print(f'Wrote {output_file}')

In [32]:
%%time
input_raster= "LS_RF_Median_1980_2018_COG"
data_folder = Path('data/Landslide')
reproj = reproj_ls(data_folder, input_raster)
for city in cities:
    clipdata_ls(city)

1
2
3
4
1
2
3
4
5
Wrote coxs_bazar_landslide.tif
1
2
3
4
1
2
3
4
1
2
3
4
1
2
3
4
1
2
3
4
1
2
3
4
1
2
3
4
1
2
3
4
1
2
3
4
1
2
3
4
1
2
3
4
1
2
3
4
CPU times: total: 57.1 s
Wall time: 8min 23s


<a id='section7_1'></a>
<h5><center> <font color='cyan'> Section 7.1: Processes historical and current built-up data from World Settlement Footprint</font>   </center></h5>

In [33]:
# download the WSF tiles that overlap with the AOIs
def download_wsf(city, wsf_type):
    # data_folder = Path(r'F:\World Bank\City Scan') / country / ('data/WSF' + wsf_type)  # change file path as needed
    data_folder = Path('data/WSF' + wsf_type)  # change file path as needed
    
    try:
        os.mkdir(data_folder)
    except FileExistsError:
        pass
    
    city_no_space = city.replace(" ", "_")
    city_lower = city_no_space.lower()
    shp_name = city_no_space + '_AOI.shp'
    shp = gpd.read_file(city / aoi_folder / shp_name)
    shp_bounds = shp.bounds
    
    for i in range(len(shp_bounds)):
        for x in range(math.floor(shp_bounds.minx[i] - shp_bounds.minx[i] % 2), math.ceil(shp_bounds.maxx[i]), 2):
            for y in range(math.floor(shp_bounds.miny[i] - shp_bounds.miny[i] % 2), math.ceil(shp_bounds.maxy[i]), 2):
                file_name = 'WSF' + wsf_type + '_v1_' + str(x) + '_' + str(y)
                if not exists(data_folder / (file_name + '.tif')):
                    if wsf_type == 'evolution':
                        file = requests.get('https://download.geoservice.dlr.de/WSF_EVO/files/' + file_name + '/' + file_name + '.tif')
                    elif wsf_type == '2019':
                        file = requests.get('https://download.geoservice.dlr.de/WSF2019/files/' + file_name + '.tif')
                    open(data_folder / (file_name + '.tif'), 'wb').write(file.content)


In [34]:
# def download_wsf():
epsg_dict = dict(zip(centroids.city, centroids.utm))
wsf_types = ['evolution', '2019']
for wsf_type in wsf_types:
    for city in cities:
        download_wsf(city, wsf_type)

In [41]:
def merge_the_tiles():
    # merge the WSF tiles into one raster file
    for wsf_type in wsf_types:
        try:
            raster_to_mosaic = []
            # mosaic_file = 'WSF' + wsf_type + '.tif'
            mosaic_file = 'WSF_mosaic' + wsf_type + '.tif'
            remove_mosaic_file= Path('data/WSF' + wsf_type)/ mosaic_file
            if os.path.isfile(remove_mosaic_file):
                os.remove(remove_mosaic_file)
            else:
                # If it fails, inform the user.
                print("Not deleted bcz: %s file not found" % remove_mosaic_file)

            # mosaic_list = list((Path(r'F:\World Bank\City Scan') / country / 'data' / ('WSF' + wsf_type)).iterdir())
            mosaic_list = list((Path('data/WSF' + wsf_type)).iterdir())

            for p in mosaic_list:
                raster = rasterio.open(p)
                raster_to_mosaic.append(raster)

            mosaic, output = merge(raster_to_mosaic)
            output_meta = raster.meta.copy()
            output_meta.update(
                {"driver": "GTiff",
                    "height": mosaic.shape[1],
                    "width": mosaic.shape[2],
                    "transform": output,
                }
            )
    
            # with rasterio.open(Path(r'F:\World Bank\City Scan') / country / 'data' / ('WSF' + wsf_type) / mosaic_file, 'w', **output_meta) as m:
            with rasterio.open(Path('data/WSF' + wsf_type)/ mosaic_file, 'w', **output_meta) as m:
                m.write(mosaic)
        except MemoryError:
            print(wsf_type)
            print('MemoryError. Try GIS instead for merging.')

In [42]:
def clipdata_wsf(city, wsf_type):
    # data_folder = Path(r'F:\World Bank\City Scan') / country / ('data/WSF' + wsf_type)
    # data_folder = Path('data/WSF' + wsf_type)
    data_folder = Path('data/WSF' + wsf_type)

    
    city_no_space = city.replace(" ", "_")
    city_lower = city_no_space.lower()
    shp_name = city_no_space + '_AOI.shp'
    shp = gpd.read_file(city / aoi_folder / shp_name)
    features = shp.geometry
    
    # input_raster = data_folder / ("WSF" + wsf_type + ".tif")
    input_raster = data_folder / ("WSF_mosaic" + wsf_type + ".tif")
    with rasterio.open(input_raster) as src:
        out_image, out_transform = rasterio.mask.mask(
            src, features, crop=True)
        out_meta = src.meta.copy()

        out_meta.update({"driver": "GTiff",
                         "height": out_image.shape[1],
                         "width": out_image.shape[2],
                         "transform": out_transform})

        output_4326_raster_clipped = city_lower + "_WSF" + wsf_type + "_4326.tif"

        with rasterio.open(city / output_folder / output_4326_raster_clipped, "w", **out_meta) as dest:
            dest.write(out_image)

In [43]:
def utm_wsf(city, wsf_type):
    city_no_space = city.replace(" ", "_")
    city_lower = city_no_space.lower()
    shp_name = city_no_space + '_AOI.shp'
    crs = epsg_dict.get(city)
    shp = gpd.read_file(city / aoi_folder / shp_name).to_crs(epsg = crs)
    features = shp.geometry
    
    with rasterio.open(city / output_folder / (city_lower + "_WSF" + wsf_type + "_4326.tif")) as src:
        dst_crs = 'EPSG:' + str(crs)

        transform, width, height = calculate_default_transform(
            src.crs, dst_crs, src.width, src.height, *src.bounds)
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': dst_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        with rasterio.open(city / output_folder / (city_lower + '_WSF' + wsf_type + '_utm.tif'), 'w', **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=dst_crs,
                    resampling=Resampling.nearest)
    
    if wsf_type == 'evolution':
        with rasterio.open(city / output_folder / (city_lower + '_WSF' + wsf_type + '_utm.tif')) as src:
            out_image = src.read()
            pixelSizeX, pixelSizeY = src.res

        year_dict = {}
        for year in range(1985, 2016):
            if year == 1985:
                year_dict[year] = np.count_nonzero(
                out_image == year) * pixelSizeX * pixelSizeY / 1000000
            else:
                year_dict[year] = np.count_nonzero(
                out_image == year) * pixelSizeX * pixelSizeY / 1000000 + year_dict[year-1]

        # save CSV
        with open(city / output_folder / (city_lower + "_built_up_stats.csv"), 'w') as f:
            f.write("year,cumulative sq km\n")
            for key in year_dict.keys():
                f.write("%s,%s\n" % (key, year_dict[key]))

In [44]:
def reclass_wsf(city, wsf_type = 'evolution'):
    city_no_space = city.replace(" ", "_")
    city_lower = city_no_space.lower()
    
    with rasterio.open(city / output_folder / (city_lower + '_WSF' + wsf_type + '_4326.tif')) as src:
        out_image = src.read()
        out_meta = src.meta.copy()
    
    out_image[0][out_image[0] < 1985] = 0
    out_image[0][(out_image[0] <= 2015) & (out_image[0] >= 2006)] = 4
    out_image[0][(out_image[0] < 2006) & (out_image[0] >= 1996)] = 3
    out_image[0][(out_image[0] < 1996) & (out_image[0] >= 1986)] = 2
    out_image[0][out_image[0] == 1985] = 1
    
    out_file = city_lower + '_WSF' + wsf_type + '_reclass.tif'
    with rasterio.open(city / output_folder / out_file, "w", **out_meta) as dest:
        dest.write(out_image)

In [45]:
def polygonize_wsf(city, wsf_type = '2019'):
    city_no_space = city.replace(" ", "_")
    city_lower = city_no_space.lower()
    
    mask = None
    
    with rasterio.open(city / output_folder / (city_lower + '_WSF' + wsf_type + '_4326.tif')) as src:
        image = src.read(1)
        results = ({'properties': {'raster_val': v}, 'geometry': s} for i, (s, v) in enumerate(shapes(image, mask=mask, transform=src.transform)))
        geoms = list(results)
        gpd_polygonized_raster = gpd.GeoDataFrame.from_features(geoms)
        gpd_polygonized_raster = gpd_polygonized_raster[gpd_polygonized_raster.raster_val != 0]

**Loop for running the wsf functions**

In [46]:
def clip_and_reclass():
    # download_wsf(city, wsf_type)
    merge_the_tiles()
    for wsf_type in wsf_types:
        for city in cities:
            clipdata_wsf(city, wsf_type)
            if wsf_type == 'evolution':
                utm_wsf(city, wsf_type)
                reclass_wsf(city)
            if wsf_type == '2019':
                polygonize_wsf(city)
                
clip_and_reclass()

Error: data\WSFevolution\WSF_mosaicevolution.tif file not found


Error: data\WSF2019\WSF_mosaic2019.tif file not found


<a id='section8'></a>
<h5><center> <font color='cyan'> Section 8: Process drought data : Exports summary statistics</font>   </center></h5>

In [47]:
data_folder = Path('data/drought')
def unzip_files(data_folder):
    extension = ".zip"
    os.chdir(data_folder) # change directory from working dir to dir with files
    for item in os.listdir(data_folder): # loop through items in dir
        if item.endswith(extension): # check for ".zip" extension
            file_name = os.path.abspath(item) # get full path of files
            zip_ref = zipfile.ZipFile(file_name) # create zipfile object
            zip_ref.extractall(data_folder) # extract file to dir
            zip_ref.close() # close file
            os.remove(file_name) # delete zipped file

In [48]:
shp = gpd.read_file(Path('shapefile') / (country.replace(' ', '_').lower() + '.shp')).buffer(50)
features = shp.geometry



  shp = gpd.read_file(Path('shapefile') / (country.replace(' ', '_').lower() + '.shp')).buffer(50)


In [49]:
raster_list = {#'rdria': 't',
               #'spg01': 'm',
               #'spg12': 'm',
               'twsan': 'm'}
month_list = ['01', '02', '03', '04', '05', '06',
              '07', '08', '09', '10', '11', '12']

In [50]:
def clip_drought(folder, raster_prefix, raster_suffix, year, date):    
    try:
        input_raster= folder /  (raster_prefix + '_m_wld_' + year + date + '01_' + raster_suffix + '.tif')
        # with rasterio.open(folder + '\\' + raster_prefix + '_m_wld_' + year + date + '01_' + raster_suffix + '.tif') as src:
        with rasterio.open(input_raster) as src:
            out_image, out_transform = rasterio.mask.mask(
                src, features, crop=True)
            out_meta = src.meta.copy()
            # print(f'input_raster : {input_raster}')
        out_meta.update({"driver": "GTiff",
                         "height": out_image.shape[1],
                         "width": out_image.shape[2],
                         "transform": out_transform})
        out_raster = Path('output/drought') / (raster_prefix + '_' + year + date + ".tif")
        # with rasterio.open('output/drought/' + raster_prefix + '_' + year + date + ".tif", "w", **out_meta) as dest:
        with rasterio.open(out_raster, "w", **out_meta) as dest:
            dest.write(out_image)
            # print(f'out_raster: {out_raster}')

    
    except:
        pass

In [51]:
for raster in raster_list.keys():
    for year in range(2011, 2021):
        for month in month_list:
            clip_drought(data_folder, raster, raster_list.get(raster), str(year), str(month))
            # clip_drought(folder_list[3], raster, raster_list.get(raster), str(year), str(month))

<a id='section9'></a>
<h5><center> <font color='cyan'> Section 9: Exports summary statistics for vars with csvs</font>   </center></h5>

**AVERAGE TEMPERATURE**

**The following snippets copies the GEE LST files to cities data dirs and then exports the average temperature csv**

In [52]:
def crop_gee_output(city, raster_name):
    with rasterio.open(Path('output') / 'GEE' / (raster_name + '.tif')) as src:
        array = src.read(1)
        out_meta = src.meta.copy()
        out_meta.update({'nodata': 'nan'})
    with rasterio.open(city / Path('data') / (raster_name + '.tif'), 'w', **out_meta) as dest:
        dest.write(array, 1)
        
for city in cities:
    crop_gee_output(city, city.replace(" ", "_").lower() + '_Summer')

In [53]:
def avg_temp(city):
    city_nospace = city.replace(" ", "_").lower()
    temp_file = city / Path('data') / (city_nospace + '_Summer.tif')
    # temp_file = city / Path('data') / (city_nospace + '_urban-ssp2_day_sum.tif')
    temp = rasterio.open(temp_file)
    temp_array = temp.read(1)
    # print(temp_array)
    return np.nanmean(temp_array)

In [54]:
cities_avg_temp = {}
for city in cities:
    cities_avg_temp[city] = avg_temp(city)

In [55]:
with open('stats/avg_temp.csv', 'w') as f:
    f.write('city,avg\n')
    for city in cities_avg_temp.keys():
        f.write("%s,%s\n"%(city, cities_avg_temp[city]))

**AIR QUALITY**

In [56]:
def avg_air(city, year):
    city_nospace = city.replace(" ", "_").lower()
    temp_file = city / Path('data') / (city_nospace + '_air_quality_' + str(year) + '.tif')
    temp = rasterio.open(temp_file)
    temp_array = temp.read(1)
    temp_array = temp_array[temp_array >= 0]
    return np.nanmean(temp_array)

In [57]:
def bad_air(city, threshold = 5):  # threshold for the definition of "bad air"; default is 5 ug/m3
    city_nospace = city.replace(" ", "_").lower()
    air_file = city / Path('data') / (city_nospace + '_air_quality_' + str(year) + '.tif')
    air = rasterio.open(air_file)
    air_array = air.read(1)
    # print(air_array)
    return sum(sum(air_array >= threshold)) / sum(sum(air_array != air.meta['nodata']))

In [58]:
cities_avg_air = {}
for city in cities:
    cities_avg_air[city] = {}
    for year in range(1998, 2020):
        cities_avg_air[city][year] = avg_air(city, year)

In [59]:
cities_bad_air = {}
for city in cities:
    cities_bad_air[city] = {}
    for year in range(1998, 2020):
        cities_bad_air[city][year] = bad_air(city)

In [60]:
with open('stats/avg_air_1998_2019.csv', 'w') as f:
    f.write('city,year,avg,pct_bad_air\n')
    for city in cities_avg_air.keys():
        for year in range(1998, 2020):
            f.write("%s,%s,%s,%s\n"%(city, year, cities_avg_air[city][year], cities_bad_air[city][year]))

 **LANDSLIDE AVERAGE FREQUENCY**

In [61]:
def landslide(city):
    city_nospace = city.replace(" ", "_").lower()
    ls_file = city / Path('data') / (city_nospace + '_landslide.tif')
    ls = rasterio.open(ls_file)
    ls_array = ls.read(1)
    return np.nanmean(ls_array)

In [62]:
cities_ls = {}
for city in cities:
    try:
        landslide_value = landslide(city)
        cities_ls[city] = landslide_value
    except:
        print(f"Skipped {city}")
        pass

Skipped Benapole
Skipped Jessore
Skipped Kushtia
Skipped Madhabdi
Skipped Panchagarh
Skipped Saidpur
Skipped Mirsharai
Skipped Feni
Skipped Madaripur
Skipped Natore
Skipped Bogura
Skipped Dinajpur
Skipped Shariatpur


In [63]:
with open('stats/landslide_avg.csv', 'w') as f:
    f.write('city,avg\n')
    for city in cities_ls.keys():
        f.write("%s,%s\n"%(city, cities_ls[city]))

<a id='section10'></a>
<h5><center> <font color='cyan'> Section 10: Export LST data from GEE. Ran this on Colab for this round.TBD</font>   </center></h5>

In [64]:
def export_landsurface_temperature( country , cities_reprojected, max_value , years, month0 ,  month1):

    """
    export_landsurface_temperature: Exports Land Surface temperature rasters for a given country in the cities shapefile

    :param country: Name of the country
    :param cities_reprojected: geopandas read cities shapefile
    :param max_value: cap the number of cities
    :param years: years
    :param month0 : first hottest month
    :param month1 : last hottest month

    :return: confirms the export of rasters

    """

    # Create a jsondictionary from the geometry of the  shapefile
    geom = cities_reprojected['geometry']
    jsonDict = eval(geom.to_json())

    for index, row in cities_reprojected.iterrows():
        try:
          city= row['NAME_1'].lower() #instruct the column for city name in the city shapefile
          if index  <= max_value:
              city_number= index
              for x in jsonDict['features'][city_number]['geometry']['coordinates']:
                  AOI = ee.Geometry.Polygon(x) #cast polygon as ee geometry
                  landsat = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2")
                  print(1)
                  # Filter for hottest months in the past X years
                  def filter_hot_month(i):
                      return ee.Filter.date(years[i] + '-' + month0 + '-01', years[i] + '-' + month1 + '-01')

                  range_list= map(filter_hot_month, list(range(0, 10))) #combination of months and years
                #   range_list= list(map(filter_hot_month, list(range(0, 10)))) #combination of months and years
                  print(1.2)
                  rangefilter = ee.Filter.Or(range_list)
                  print(2)

                  # Define a function to scale the data and mask unwanted pixels
                  def maskL457sr(image):
                      # Bit 0 - Fill
                      # Bit 1 - Dilated Cloud
                      # Bit 2 - Cirrus (high confidence)
                      # Bit 3 - Cloud
                      # Bit 4 - Cloud Shadow
                      qaMask = image.select('QA_PIXEL').bitwiseAnd(int('11111', 2)).eq(0)
                      saturationMask = image.select('QA_RADSAT').eq(0)
                      # Apply the scaling factors to the appropriate bands.
                      thermalBand = image.select('ST_B10').multiply(0.00341802).add(149.0)
                      # Replace the original bands with the scaled ones and apply the masks.
                      return image.addBands(thermalBand, None, True).updateMask(qaMask).updateMask(saturationMask)
                  print(3)

                  # Apply filter and mask
                  collectionSummer = landsat.filter(rangefilter).filterBounds(AOI).map(maskL457sr).select('ST_B10').mean().add(-273.15).clip(AOI)
                  print(4)

                  task = ee.batch.Export.image.toDrive(**{
                      'image': collectionSummer,
                      'description': city + "_Summer",
                      'folder': country,
                      'region': AOI,
                      'scale': 30,
                      'maxPixels': 1e9
                  })
                  task.start()

                  # filename= city + "_Summer"
                  # filename = os.path.join(country, filename)
                  # geemap.ee_export_image(collectionSummer, filename=filename, 
                  # scale=30, region=AOI)
                #   geemap.ee_export_image_collection_to_drive(collectionSummer,
                #    folder='export', scale=30)
                

        except Exception as e:
            print(f"Error : {e} ")
    return  print(f"Successfully exported the rasters for: {country}")

#### Pick this up after this iteration

In [65]:
# for city in cities:
#     cities_reprojected = gpd.read_file(Path('shapefile') / (city.replace(" ", "_").lower() + '.shp')).to_crs(epsg = 4326)
#     max_value=5
#     years =list(str(i) for i in range(2013, 2024)) #years from 2013-2023
#     month0 = '04'  # first hottest month  # update for each country
#     month1 = '07'  # end of hottest month (note that this is exclusive)  # update for each country
#     country= 'BGD' # replace with the relevent country name
#     # cities_reprojected = .to_crs({'init': 'epsg:4326'})
#     start_exporting= export_landsurface_temperature(country, cities_reprojected, max_value, years, month0, month1)



<a id='section11'></a>
<h5><center> <font color='cyan'> Section 11: Arcpy stats </font>   </center></h5>

***Copy the fathom flood datasets n their respective cities subfolder***

In [66]:
fathom_folder = Path('data') / 'Fathom'
floods = {'fluvial_undefended': 'FU',
          'pluvial': 'P'}
rps = [5, 10, 20, 50, 75, 100, 200, 250, 500, 1000]

for city in cities:
    for flood in floods:
        for rp in rps:
            copyfile(fathom_folder / flood / (floods[flood] + '_1in' + str(rp) + '.tif'), 
                     city / Path('data') / (floods[flood] + '_1in' + str(rp) + '.tif'))

**Built up projection**

In [67]:
# # ssps = ['1', '2', '3']
# ssps = ['2', '5']
# years = ['2050', '2100']
# for city in cities:
#     city1 = city.lower().replace(' ', '_')
#     city2 = city1.replace('-', '_')
#     for flood in floods:
#         for ssp in ssps:
#             for year in years:
#                 ZonalStatisticsAsTable(city + '/data/class_all_' + flood + '.tif', 
#                                        "Value", city + '/data/' + city1 + '_bu_ssp' + ssp + '_' + year + '.tif', 
#                                        city2 + '_ssp' + ssp + '_' + flood + '_' + year, "DATA", "SUM")


<a id='section12'></a>
<h5><center> <font color='cyan'> Section 12: Package data for sharing</font>   </center></h5>

In [68]:
def create_folder(name):
    try:
        os.mkdir(name)
    except FileExistsError:
        pass

### Share maps

In [69]:
# ssps = ['1', '2', '3']
ssps = ['2', '5']
map_folders = ['air', 'AOI', 'landslide', 'summer', 'fluvial', 'pluvial']
for s in ssps:
    for f in ['fu', 'pu']:
        for y in ['2050', '2100']:
            map_folders.append('bu_proj_' + f + '_ssp' + s + '_' + y)
    for t in ['day', 'night']:
        map_folders.append('summer_' + t + '_ssp' + s)
for y in ['2050', '2100']:
    for s in ['', '_RL10']:
        map_folders.append('slr' + s + '_' + y)

for i in map_folders:
    create_folder(Path('maps') / i)

###### Run the R scripts that creates maps before this part

In [70]:
def copy_file(city, orig_suffix, new_folder = '', new_suffix = ''):
    city1 = city.lower().replace(' ', '_')
    
    if new_folder == '':
        new_folder = orig_suffix
    if new_suffix == '':
        new_suffix = orig_suffix
    
    try:
        copyfile(city / Path('maps') / (city1 + '_' + orig_suffix + '.png'), 
                 'maps' / Path(new_folder) / (city1 + '_' + new_suffix + '.png'))
    except FileNotFoundError:
        pass