# MENA Analysis: Population
## 2000 -2020

### Load packages and setup

In [2]:
import geopandas as gpd
import pandas as pd
import numpy as np
from rasterstats import zonal_stats
import rasterio

In [4]:
# Define directories
dir_shp = '/Users/ashitakarl/Library/CloudStorage/Dropbox/WB/MENA_WorldBank/Boundaries/'
dir_in = '/Users/ashitakarl/Desktop/Project/WB/MENA/POP/'
dir_out = '/Users/ashitakarl/Library/CloudStorage/Dropbox/WB/MENA_WorldBank/Index/ADM2/'

# Define output FILE
VAR = 'POP_'
YEAR_LIST = [2000, 2005, 2010, 2015, 2020]
FILE_OUT = 'ADM2_' + VAR + str(min(YEAR_LIST)) + '_' + str(max(YEAR_LIST)) + '.csv'

In [3]:
%matplotlib inline

### Load shapefile

In [5]:
# Load shapefile
MENA_shp = gpd.read_file(dir_shp + 'MENA_ADM2.shp')

MENA_shp.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

### Load nightlights rasters

In [6]:
df_full = pd.DataFrame(MENA_shp['ID_ADM'])

for YEAR in YEAR_LIST:
    with rasterio.open(dir_in + 'ppp_' + str(YEAR) + 
                       '_1km_Aggregated.tif') as raster:
        array = raster.read(1)
        affine = raster.transform
        nodata = raster.nodata

    MENA_shp.to_crs(raster.crs, inplace = True)

    # Extract raster by polygon
    dict_temp = zonal_stats(MENA_shp, array, affine = affine, nodata = nodata,
                            interpolate = 'bilinear', all_touched = True,
                            stats = ['min', 'mean', 'max', 'median', 'std', 'count'])
    df_temp = pd.DataFrame(dict_temp)
    df_temp = df_temp.add_suffix('_' + str(YEAR))
    df_temp = df_temp.add_prefix(VAR)
    df_temp.insert(0, 'ID_ADM', MENA_shp['ID_ADM'])
    
    df_full = df_full.merge(df_temp, how = 'left', on = 'ID_ADM')

In [6]:
df_full

Unnamed: 0,ID_ADM,POP_min_2000,POP_max_2000,POP_mean_2000,POP_count_2000,POP_std_2000,POP_median_2000,POP_min_2005,POP_max_2005,POP_mean_2005,...,POP_mean_2015,POP_count_2015,POP_std_2015,POP_median_2015,POP_min_2020,POP_max_2020,POP_mean_2020,POP_count_2020,POP_std_2020,POP_median_2020
0,37631,0.414223,1611.324951,53.821983,912,158.143397,7.515827,0.340664,2850.819092,66.351357,...,96.326052,912,385.436366,14.456112,1.142250,5308.097168,117.628384,912,510.062055,13.603415
1,37632,0.128915,81.391754,3.697675,2182,9.303128,1.610895,0.131175,156.971405,4.301199,...,5.751155,2182,10.372151,4.167399,0.285351,404.456116,6.710963,2182,15.111817,4.849154
2,37633,0.047294,108.840691,0.557358,18906,3.634108,0.129715,0.045138,171.696426,0.603160,...,0.726833,18906,7.258346,0.191492,0.070510,597.460510,0.793500,18906,9.391222,0.212453
3,37634,0.131578,60.229355,3.874770,3992,5.993184,2.416677,0.128437,214.600418,4.727891,...,6.574974,3992,11.217599,4.517100,0.287954,396.395599,7.873841,3992,16.290413,5.320157
4,37635,0.007514,16.079077,0.067431,152922,0.173688,0.044444,0.012542,58.140713,0.089490,...,0.157888,152922,0.223000,0.112827,0.072692,68.637611,0.209709,152922,0.416619,0.153840
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2932,15561,0.000000,17505.392578,529.190182,5103,744.618805,395.443451,0.000000,21774.845703,583.171909,...,701.717323,5103,1097.165810,457.145874,0.000000,29981.365234,773.620272,5103,1190.304164,507.658264
2933,15562,0.000000,4037.879883,1.454047,212627,30.792179,0.000000,0.000000,4824.119141,1.679864,...,2.238031,212627,49.834419,0.000000,0.000000,7011.243164,2.591070,212627,55.706399,0.000000
2934,15563,0.000000,10726.921875,619.618315,4974,1050.751202,0.000000,0.000000,9870.050781,623.913701,...,785.391938,4974,1792.438802,0.000000,0.000000,53477.222656,1053.944713,4974,3292.439112,0.000000
2935,15564,0.000000,3056.863770,7.395430,37065,60.618743,0.694808,0.000000,3697.207764,8.384602,...,10.693783,37065,88.476504,1.033680,0.000000,5327.790527,12.004474,37065,95.227526,1.139799


In [14]:
df_full.POP_median_2000.min()

0.0

In [15]:
df_full.to_csv(dir_out + FILE_OUT, index = False)