# MENA Analysis: Landcover
## 2001 - 2020

### Load packages and setup

In [1]:
import os
import warnings
import matplotlib.pyplot as plt
import numpy as np
from shapely.geometry import mapping, box
import geopandas as gpd
import pandas as pd
from rasterstats import zonal_stats
from rasterio.merge import merge
import glob
import rasterio

In [2]:
# Define directories
dir_shp = '/Users/ashitakarl/Library/CloudStorage/Dropbox/WB/MENA_WorldBank/Boundaries/'
dir_in = '/Users/ashitakarl/Desktop/Project/WB/MENA/MODIS/'
dir_out = '/Users/ashitakarl/Library/CloudStorage/Dropbox/WB/MENA_WorldBank/Index/ADM2/'

# Define output FILE
VAR = 'LAND_'
YEAR_LIST = [2001, 2005, 2010, 2015, 2020]
FILE_OUT = 'ADM2_' + VAR + str(min(YEAR_LIST)) + '_' + str(max(YEAR_LIST)) + '.csv'

In [5]:
%matplotlib inline

### Load shapefile

In [3]:
# Load shapefile
MENA_shp = gpd.read_file(dir_shp + 'MENA_ADM2.shp')

MENA_shp.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

### Load and process rasters

### Load back merged rasters and extract values

In [21]:
df_land = pd.DataFrame(MENA_shp['ID_ADM'])

for YEAR in YEAR_LIST:
    
    with rasterio.open(dir_in + str(YEAR) + '_Merged.tif') as raster:
        array = raster.read(1)
        affine = raster.transform
        nodata = raster.nodata

    MENA_shp.to_crs(raster.crs, inplace = True)

    # Extract raster by polygon
    dict_temp = zonal_stats(MENA_shp, array, affine = affine, nodata = nodata,
                            stats = ['mean', 'count'], categorical = True)
    land_temp = pd.DataFrame(dict_temp)
    land_temp = land_temp.add_prefix(VAR)
    COL_LIST = ['LAND_1', 'LAND_2', 'LAND_3', 'LAND_4', 'LAND_5', 'LAND_12', 'LAND_14', 'LAND_16']
    for COL in COL_LIST:
        if COL not in land_temp.columns:
            land_temp[COL] = 0
    land_temp.fillna(0, inplace=True)
    land_temp['Forest'] = (land_temp['LAND_1'] + land_temp['LAND_2'] + land_temp['LAND_3'] + 
                           land_temp['LAND_4'] + land_temp['LAND_5']) * 0.8
    land_temp['Cropland'] = land_temp['LAND_12'] * 0.8 + land_temp['LAND_14'] * 0.5
    land_temp['Barren'] = land_temp['LAND_16'] * 0.8
    land_temp = land_temp.add_suffix('_' + str(YEAR))
    land_temp.insert(0, 'ID_ADM', MENA_shp['ID_ADM'])
        
    df_land = df_land.merge(land_temp, how = 'left', on = 'ID_ADM')

# Export
df_land.to_csv('df_land.csv', index = False)

In [22]:
# Fill NAs
df_land.fillna(0, inplace=True)
df_land

Unnamed: 0,ID_ADM,LAND_7_2001,LAND_10_2001,LAND_13_2001,LAND_16_2001,LAND_mean_2001,LAND_count_2001,LAND_12_2001,LAND_9_2001,LAND_14_2001,...,LAND_5_2020,LAND_11_2020,LAND_2_2020,LAND_6_2020,LAND_4_2020,LAND_15_2020,LAND_3_2020,Forest_2020,Cropland_2020,Barren_2020
0,37631,29.0,21.0,71.0,3101.0,15.813780,3222,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,2493.6
1,37632,0.0,0.0,0.0,7766.0,16.000000,7766,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,6212.8
2,37633,65.0,19.0,0.0,66064.0,15.989433,66148,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,52860.0
3,37634,0.0,0.0,0.0,14228.0,16.000000,14228,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,11382.4
4,37635,0.0,0.0,16.0,559508.0,15.999914,559524,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,447606.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2932,15561,0.0,16.0,602.0,37.0,12.036153,17426,15601.0,593.0,338.0,...,0.0,45.0,0.0,0.0,0.0,0.0,0,0.0,12722.4,17.6
2933,15562,357.0,440.0,160.0,735911.0,15.990369,737602,301.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,366.4,588634.4
2934,15563,561.0,592.0,536.0,13778.0,14.921830,17897,2229.0,30.0,0.0,...,0.0,16.0,0.0,0.0,0.0,0.0,0,0.0,1989.8,10348.0
2935,15564,0.0,0.0,82.0,127085.0,16.002349,127713,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,101601.6


In [23]:
df_out = df_land[['ID_ADM', 'LAND_count_2001', 'Forest_2001', 'Cropland_2001', 'Barren_2001',
                  'Forest_2005', 'Cropland_2005', 'Barren_2005',
                  'Forest_2010', 'Cropland_2010', 'Barren_2010',
                  'Forest_2015', 'Cropland_2015', 'Barren_2015',
                  'Forest_2020', 'Cropland_2020', 'Barren_2020']]

In [24]:
df_out

Unnamed: 0,ID_ADM,LAND_count_2001,Forest_2001,Cropland_2001,Barren_2001,Forest_2005,Cropland_2005,Barren_2005,Forest_2010,Cropland_2010,Barren_2010,Forest_2015,Cropland_2015,Barren_2015,Forest_2020,Cropland_2020,Barren_2020
0,37631,3222,0.0,0.0,2480.8,0.0,0.0,2490.4,0.0,0.0,2486.4,0.0,0.0,2488.0,0.0,0.0,2493.6
1,37632,7766,0.0,0.0,6212.8,0.0,0.0,6212.8,0.0,0.0,6212.8,0.0,0.0,6212.8,0.0,0.0,6212.8
2,37633,66148,0.0,0.0,52851.2,0.0,0.0,52840.0,0.0,0.0,52860.8,0.0,0.0,52869.6,0.0,0.0,52860.0
3,37634,14228,0.0,0.0,11382.4,0.0,0.0,11382.4,0.0,0.0,11382.4,0.0,0.0,11382.4,0.0,0.0,11382.4
4,37635,559524,0.0,0.0,447606.4,0.0,0.0,447606.4,0.0,0.0,447606.4,0.0,0.0,447606.4,0.0,0.0,447606.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2932,15561,17426,0.0,12649.8,29.6,0.0,12655.3,24.8,0.0,12712.0,20.8,0.0,12719.1,20.0,0.0,12722.4,17.6
2933,15562,737602,0.0,240.8,588728.8,0.0,263.2,588545.6,0.0,313.6,588384.8,0.0,348.0,588242.4,0.0,366.4,588634.4
2934,15563,17897,0.0,1783.2,11022.4,0.0,1795.2,10932.8,0.0,1871.2,10766.4,0.0,1906.1,10524.8,0.0,1989.8,10348.0
2935,15564,127713,0.0,0.0,101668.0,0.0,0.0,101668.0,0.0,0.0,101663.2,0.0,0.0,101657.6,0.0,0.0,101601.6


In [25]:
df_out.to_csv(dir_out + FILE_OUT, index = False)

In [29]:
df_out.Forest_2001.max()
#df_out.Forest_2001.isna().sum().sum()

4048.0