# MENA Analysis: Drought
## 2001 - 2020

### Load packages and setup

In [5]:
import geopandas as gpd
import pandas as pd
import numpy as np
from rasterstats import zonal_stats
import rasterio
import rioxarray
import xarray

In [15]:
# Define directories
dir_shp = 'M:/MENA/GEO/Boundaries/'
dir_in = 'M:/MENA/GEO/Hazards/PDSI/raw/'
dir_out = 'M:/MENA/GEO/Hazards/PDSI/final/'

# Define output FILE
VAR = 'PDSI_'
YEAR_LIST = [2012,2013,2014,2015,2016,2017,2018,2019,2020]
MONTH_LIST = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']
FILE_OUT = 'ADM2_' + VAR + str(min(YEAR_LIST)) + '_' + str(max(YEAR_LIST)) + '.csv'

In [16]:
%matplotlib inline

### Load shapefile

In [17]:
# Load shapefile
MENA_shp = gpd.read_file(dir_shp + 'MENA_ADM2.shp')

MENA_shp.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

### Load PDSI drought rasters

In [18]:
df_full = pd.DataFrame(MENA_shp['ID_ADM'])

for YEAR in YEAR_LIST:
    for MONTH in MONTH_LIST:
        with rasterio.open(dir_in + 'PDSI_' + str(YEAR)+ '_' + str(MONTH) + '.tif') as raster:
            array = raster.read(1)
            affine = raster.transform
            nodata = raster.nodata

        MENA_shp.to_crs(raster.crs, inplace = True)

        # Extract raster by polygon
        dict_temp = zonal_stats(MENA_shp, array, affine = affine, nodata = nodata, 
                                interpolate = 'bilinear', all_touched = True,
                                stats = ['mean'])
        df_temp = pd.DataFrame(dict_temp)
        df_temp = df_temp.add_prefix(VAR)

        df_temp = df_temp.add_suffix('_' + str(YEAR)+ str(MONTH))
        df_temp.insert(0, 'ID_ADM', MENA_shp['ID_ADM'])
    
        df_full = df_full.merge(df_temp, how = 'left', on = 'ID_ADM')



In [19]:
df_full.PDSI_mean_20127.isna().sum().sum()

0

In [20]:
df_full.PDSI_mean_20127.mean()

-109.5742306742259

In [21]:
df_full

Unnamed: 0,ID_ADM,PDSI_mean_20121,PDSI_mean_20122,PDSI_mean_20123,PDSI_mean_20124,PDSI_mean_20125,PDSI_mean_20126,PDSI_mean_20127,PDSI_mean_20128,PDSI_mean_20129,...,PDSI_mean_20203,PDSI_mean_20204,PDSI_mean_20205,PDSI_mean_20206,PDSI_mean_20207,PDSI_mean_20208,PDSI_mean_20209,PDSI_mean_202010,PDSI_mean_202011,PDSI_mean_202012
0,37631,-282.227746,-259.456647,-252.793064,-184.684393,-180.750289,-172.062428,-166.458960,-161.708671,-136.797688,...,311.561850,360.469364,318.019653,270.173410,236.810405,213.264740,216.868208,-109.178035,-228.193064,-275.234682
1,37632,-139.687038,-165.497782,-152.934943,-127.448497,-158.853622,-188.978314,-225.520453,-242.547560,-184.090192,...,381.646131,316.898472,260.521932,123.903401,73.923608,78.399211,78.756037,-5.516511,-89.547068,-123.510103
2,37633,-166.455939,-229.742606,-153.038651,-103.959786,-121.180692,-112.852248,-106.197403,-131.086619,-141.746393,...,-242.804701,-130.592450,-67.238218,-66.397692,-60.712371,-73.212972,-33.195660,-136.916747,-160.877615,-216.379358
3,37634,-135.859401,-152.582640,-113.770105,-93.715474,-130.694676,-164.511925,-204.097892,-238.727121,-177.308930,...,335.694676,271.988907,219.836106,105.998336,27.986412,32.966445,39.311703,-64.984193,-158.101220,-177.413200
4,37635,34.950932,29.210600,23.157346,23.475086,7.759946,15.987938,8.494586,176.129774,300.461127,...,391.148371,342.916599,296.884893,220.764938,413.144486,277.651789,195.800054,154.666531,108.071671,75.112118
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2932,15561,-270.919423,-227.882316,-228.392070,-261.151399,-276.845844,-251.325700,-229.175148,-208.780746,-189.334394,...,418.388041,359.091815,299.820823,266.015903,236.067642,208.392282,182.838846,133.186811,333.799194,174.879983
2933,15562,-145.136989,-229.782168,-256.949942,-260.475857,-249.279701,-222.748541,-201.750438,-179.915211,-168.886404,...,602.858500,528.414651,458.922260,412.471395,372.160833,335.104916,297.869582,225.486901,351.548933,194.682057
2934,15563,-75.683707,-85.524255,-86.795084,-108.579943,-109.182075,-91.249075,-89.066130,-80.130085,-53.878616,...,-40.586469,-42.492930,-19.512943,-0.193387,-5.779856,23.249946,25.237329,24.729824,0.849250,-5.684142
2935,15564,-379.249041,-386.587654,-400.009731,-441.147098,-431.496332,-388.154342,-351.062253,-318.283544,-288.911318,...,640.215182,551.340823,498.796359,446.166713,399.265770,356.118212,313.909415,265.098781,257.604690,106.077386


In [22]:
# Assuming you've already defined and populated df_full
df_full.to_csv("M:/MENA/GEO/Hazards/PDSI/final/PDSI_2012_2020.csv", index=False)
