# MENA Analysis: Drought
## 2001 - 2020

### Load packages and setup

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
from rasterstats import zonal_stats
import rasterio
import rioxarray
import xarray

In [2]:
# Define directories
dir_shp = '/Users/ashitakarl/Library/CloudStorage/Dropbox/WB/MENA_WorldBank/Boundaries/'
dir_in = '/Users/ashitakarl/Desktop/Project/WB/MENA/CDI/'
dir_out = '/Users/ashitakarl/Library/CloudStorage/Dropbox/WB/MENA_WorldBank/Index/ADM2/'

# Define output FILE
VAR = 'CDI_'
YEAR_LIST = [2001, 2005, 2010, 2015, 2020]
MONTH_LIST = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']
FILE_OUT = 'ADM2_' + VAR + str(min(YEAR_LIST)) + '_' + str(max(YEAR_LIST)) + '.csv'

In [3]:
%matplotlib inline

### Load shapefile

In [4]:
# Load shapefile
MENA_shp = gpd.read_file(dir_shp + 'MENA_ADM2.shp')

MENA_shp.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

### Load CDI drought rasters

In [14]:
df_full = pd.DataFrame(MENA_shp['ID_ADM'])

for YEAR in YEAR_LIST:
    for MONTH in MONTH_LIST:
        with rasterio.open(dir_in + 'FINAL_CDI_' + str(YEAR) + str(MONTH) + '.tif') as raster:
            array = raster.read(1)
            affine = raster.transform
            nodata = raster.nodata

        MENA_shp.to_crs(raster.crs, inplace = True)

        # Extract raster by polygon
        dict_temp = zonal_stats(MENA_shp, array, affine = affine, nodata = nodata, 
                                interpolate = 'bilinear', all_touched = True,
                                stats = ['mean'])
        df_temp = pd.DataFrame(dict_temp)
        df_temp = df_temp.add_prefix(VAR)

        df_temp = df_temp.add_suffix('_' + str(YEAR) + str(MONTH))
        df_temp.insert(0, 'ID_ADM', MENA_shp['ID_ADM'])
    
        df_full = df_full.merge(df_temp, how = 'left', on = 'ID_ADM')

In [27]:
df_full.CDI_mean_200107.isna().sum().sum()

84

In [26]:
df_full.CDI_mean_200101.mean()

0.47302776937436913

In [16]:
df_full

Unnamed: 0,ID_ADM,CDI_mean_200101,CDI_mean_200102,CDI_mean_200103,CDI_mean_200104,CDI_mean_200105,CDI_mean_200106,CDI_mean_200107,CDI_mean_200108,CDI_mean_200109,...,CDI_mean_202003,CDI_mean_202004,CDI_mean_202005,CDI_mean_202006,CDI_mean_202007,CDI_mean_202008,CDI_mean_202009,CDI_mean_202010,CDI_mean_202011,CDI_mean_202012
0,37631,0.000000,0.026000,0.000000,0.204000,0.460000,0.102000,0.000000,0.660000,0.280000,...,0.248000,0.768000,0.740000,0.706000,0.544000,0.310000,0.054000,0.000000,0.172000,0.162000
1,37632,0.000000,0.000000,0.037500,0.692500,0.878750,0.107500,0.037500,0.828750,0.070000,...,0.497500,0.632500,0.737500,0.381429,0.112500,0.575000,0.278750,0.085000,0.114286,0.234286
2,37633,0.034167,0.039444,0.015278,0.412778,0.646667,0.058889,0.219444,0.605000,0.166667,...,0.446389,0.803429,0.805000,0.724857,0.351944,0.258611,0.063143,0.054444,0.221429,0.306286
3,37634,0.000000,0.007500,0.020833,0.780000,0.894167,0.116667,0.033333,0.717500,0.185000,...,0.375000,0.636667,0.768333,0.485000,0.177500,0.425833,0.196364,0.078333,0.089167,0.211667
4,37635,0.328708,0.116172,0.027416,0.082153,0.161388,0.296459,0.250526,0.512488,0.438278,...,0.467039,0.461550,0.738267,0.518878,0.445545,0.203062,0.062660,0.180478,0.190386,0.611731
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2932,15561,0.581667,0.210833,0.241667,0.330000,0.350000,0.165000,0.394167,0.341667,0.110000,...,0.930000,0.721818,0.516667,0.193636,0.364167,0.507500,0.475000,0.314545,0.871818,0.740000
2933,15562,0.243481,0.146630,0.234741,0.187148,0.396852,0.107296,0.201296,0.301704,0.152259,...,0.993333,0.951547,0.134535,0.427233,0.748359,0.376667,0.338717,0.172659,0.745500,0.684144
2934,15563,0.396111,0.318889,0.370556,0.519444,0.501667,0.443889,0.445556,0.440000,0.420000,...,0.968333,0.790000,0.915625,0.961667,0.957333,0.803889,0.598333,0.401667,0.647647,0.714375
2935,15564,0.562833,0.164667,0.516500,0.685333,0.735667,0.447333,0.292333,0.313667,0.282333,...,0.957719,0.956607,0.332679,0.615556,0.769636,0.833158,0.702982,0.372727,0.495283,0.412143


In [17]:
df_full.to_csv(dir_out + FILE_OUT, index = False)