# MENA Analysis: Forest
## 2000 -2020

### Load packages and setup

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
from rasterstats import zonal_stats
import rasterio
from rasterio.merge import merge
import os
import glob

In [2]:
# Define directories
dir_shp = '/Users/ashitakarl/Library/CloudStorage/Dropbox/WB/MENA_WorldBank/Boundaries/'
dir_base = '/Users/ashitakarl/Desktop/Project/WB/MENA/Hansen/base/'
dir_loss = '/Users/ashitakarl/Desktop/Project/WB/MENA/Hansen/loss/'
dir_in = '/Users/ashitakarl/Desktop/Project/WB/MENA/Hansen/'
dir_out = '/Users/ashitakarl/Library/CloudStorage/Dropbox/WB/MENA_WorldBank/Index/'

# Define output FILE
VAR = 'LOSS_'
YEAR_LIST = [2000, 2005, 2010, 2015, 2020]
FILE_OUT = 'ADM2_' + VAR + str(min(YEAR_LIST)) + '_' + str(max(YEAR_LIST)) + '.csv'

In [3]:
%matplotlib inline

### Load shapefile

In [4]:
# Load shapefile
MENA_shp = gpd.read_file(dir_shp + 'MENA_ADM2.shp')

MENA_shp.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

### Load and process base layers

### Load and process loss layers

### Extract base raster

In [5]:
df_base = pd.DataFrame(MENA_shp['ID_ADM'])

for YEAR in [2000]:
    with rasterio.open(dir_in + 'base_full.tif') as raster:
        array = raster.read(1)
        affine = raster.transform
        nodata = raster.nodata

    MENA_shp.to_crs(raster.crs, inplace = True)

    # Extract raster by polygon
    dict_temp = zonal_stats(MENA_shp, array, affine = affine, nodata = nodata,
                          stats = ['mean', 'count'])
    base_temp = pd.DataFrame(dict_temp)
    base_temp = base_temp.add_suffix('_' + str(YEAR))
    base_temp = base_temp.add_prefix('BASE_')
    base_temp.insert(0, 'ID_ADM', MENA_shp['ID_ADM'])
    
    df_base = df_base.merge(base_temp, how = 'left', on = 'ID_ADM')

# Export
df_base.to_csv('df_base.csv', index = False)





















































































































In [8]:
df_base

Unnamed: 0,ID_ADM,BASE_mean_2000,BASE_count_2000
0,37631,0.000000,1014395
1,37632,0.000002,2421916
2,37633,0.000002,21012777
3,37634,0.000000,4437265
4,37635,0.000000,169916730
...,...,...,...
2932,15561,18.378461,5671558
2933,15562,0.000109,236153161
2934,15563,1.042405,5535369
2935,15564,0.000001,41186955


In [11]:
# Note year 2021
df_loss = pd.DataFrame(MENA_shp['ID_ADM'])

with rasterio.open(dir_in + 'loss_full.tif') as raster:
    array = raster.read(1)
    affine = raster.transform
    nodata = raster.nodata

MENA_shp.to_crs(raster.crs, inplace = True)

# Extract raster by polygon
dict_temp = zonal_stats(MENA_shp, array, affine = affine, nodata = nodata,
                        stats = ['mean', 'count'], categorical = True)
loss_temp = pd.DataFrame(dict_temp)
loss_temp = loss_temp.add_prefix('LOSS_')
loss_temp.insert(0, 'ID_ADM', MENA_shp['ID_ADM'])
    
df_loss = df_loss.merge(loss_temp, how = 'left', on = 'ID_ADM')



























































































































In [14]:
df_loss

Unnamed: 0,ID_ADM,LOSS_0,LOSS_mean,LOSS_count,LOSS_1,LOSS_2,LOSS_3,LOSS_4,LOSS_6,LOSS_7,...,LOSS_13,LOSS_14,LOSS_15,LOSS_16,LOSS_17,LOSS_18,LOSS_20,LOSS_21,LOSS_5,LOSS_19
0,37631,1014395,0.000000,1014395,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,37632,2421916,0.000000,2421916,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,37633,21012777,0.000000,21012777,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,37634,4437265,0.000000,4437265,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,37635,169916730,0.000000,169916730,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2932,15561,5656903,0.029746,5671558,897.0,195.0,333.0,708.0,182.0,316.0,...,1583.0,1722.0,249.0,220.0,555.0,996.0,179.0,394.0,350.0,155.0
2933,15562,236153112,0.000002,236153161,1.0,0.0,0.0,0.0,1.0,2.0,...,34.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2934,15563,5535023,0.000806,5535369,0.0,0.0,3.0,0.0,4.0,2.0,...,158.0,120.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
2935,15564,41186831,0.000039,41186955,0.0,0.0,0.0,0.0,0.0,0.0,...,84.0,21.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
df_loss.fillna(0, inplace=True)

# Export
df_loss.to_csv('df_loss.csv', index = False)

In [16]:
df_loss['LOSS_2001_2005'] = df_loss['LOSS_1'] + df_loss['LOSS_2'] + df_loss['LOSS_3'] + df_loss['LOSS_4'] + df_loss['LOSS_5']
df_loss['LOSS_2006_2010'] = df_loss['LOSS_6'] + df_loss['LOSS_7'] + df_loss['LOSS_8'] + df_loss['LOSS_9'] + df_loss['LOSS_10']
df_loss['LOSS_2011_2015'] = df_loss['LOSS_11'] + df_loss['LOSS_12'] + df_loss['LOSS_13'] + df_loss['LOSS_14'] + df_loss['LOSS_15']
df_loss['LOSS_2016_2020'] = df_loss['LOSS_16'] + df_loss['LOSS_17'] + df_loss['LOSS_18'] + df_loss['LOSS_19'] + df_loss['LOSS_20']

df_full = df_loss[['ID_ADM', 'LOSS_0', 'LOSS_2001_2005', 'LOSS_2006_2010', 'LOSS_2011_2015', 'LOSS_2016_2020']]
df_full = df_full.merge(df_base, on = 'ID_ADM', how = 'left')

In [21]:
df_full = df_full[['ID_ADM', 'LOSS_0', 'LOSS_2001_2005', 'LOSS_2006_2010', 'LOSS_2011_2015', 'LOSS_2016_2020',
                  'BASE_mean_2000', 'BASE_count_2000']]
df_full.rename(columns = {'LOSS_0': 'No_LOSS'}, inplace = True)

In [22]:
df_full

Unnamed: 0,ID_ADM,No_LOSS,LOSS_2001_2005,LOSS_2006_2010,LOSS_2011_2015,LOSS_2016_2020,BASE_mean_2000,BASE_count_2000
0,37631,1014395,0.0,0.0,0.0,0.0,0.000000,1014395
1,37632,2421916,0.0,0.0,0.0,0.0,0.000002,2421916
2,37633,21012777,0.0,0.0,0.0,0.0,0.000002,21012777
3,37634,4437265,0.0,0.0,0.0,0.0,0.000000,4437265
4,37635,169916730,0.0,0.0,0.0,0.0,0.000000,169916730
...,...,...,...,...,...,...,...,...
2932,15561,5656903,2483.0,1386.0,8287.0,2105.0,18.378461,5671558
2933,15562,236153112,1.0,5.0,43.0,0.0,0.000109,236153161
2934,15563,5535023,5.0,10.0,331.0,0.0,1.042405,5535369
2935,15564,41186831,0.0,0.0,124.0,0.0,0.000001,41186955


In [25]:
df_out = df_full.copy(deep = True)

df_out['Forest_sqkm_2000'] = (df_out['BASE_count_2000'] * df_out['BASE_mean_2000'] / 100) * 900 / 1000000
df_out['No_Loss_sqkm_2001_2020'] = (df_out['No_LOSS']) * 900 / 1000000
df_out['Loss_sqkm_2001_2005'] = (df_out['LOSS_2001_2005']) * 900 / 1000000
df_out['Loss_sqkm_2006_2010'] = (df_out['LOSS_2006_2010']) * 900 / 1000000
df_out['Loss_sqkm_2011_2015'] = (df_out['LOSS_2011_2015']) * 900 / 1000000
df_out['Loss_sqkm_2016_2020'] = (df_out['LOSS_2016_2020']) * 900 / 1000000

df_out = df_out[['ID_ADM', 'Forest_sqkm_2000', 'No_Loss_sqkm_2001_2020', 
                 'Loss_sqkm_2001_2005', 'Loss_sqkm_2006_2010', 'Loss_sqkm_2011_2015', 'Loss_sqkm_2016_2020']]

In [26]:
df_out

Unnamed: 0,ID_ADM,Forest_sqkm_2000,No_Loss_sqkm_2001_2020,Loss_sqkm_2001_2005,Loss_sqkm_2006_2010,Loss_sqkm_2011_2015,Loss_sqkm_2016_2020
0,37631,0.000000,912.9555,0.0000,0.0000,0.0000,0.0000
1,37632,0.000036,2179.7244,0.0000,0.0000,0.0000,0.0000
2,37633,0.000297,18911.4993,0.0000,0.0000,0.0000,0.0000
3,37634,0.000000,3993.5385,0.0000,0.0000,0.0000,0.0000
4,37635,0.000000,152925.0570,0.0000,0.0000,0.0000,0.0000
...,...,...,...,...,...,...,...
2932,15561,938.110545,5091.2127,2.2347,1.2474,7.4583,1.8945
2933,15562,0.232317,212537.8008,0.0009,0.0045,0.0387,0.0000
2934,15563,51.930882,4981.5207,0.0045,0.0090,0.2979,0.0000
2935,15564,0.000486,37068.1479,0.0000,0.0000,0.1116,0.0000


In [27]:
df_out.to_csv(dir_out + FILE_OUT, index = False)