# MENA Analysis: CO2 Concentration
## 2015 - 2020

### Load packages and setup

In [2]:
import geopandas as gpd
import pandas as pd
import numpy as np
from rasterstats import zonal_stats
import rasterio

In [3]:
# Define directories
dir_shp = '/Users/ashitakarl/Library/CloudStorage/Dropbox/WB/MENA_WorldBank/Boundaries/'
dir_in = '/Users/ashitakarl/Desktop/Project/WB/MENA/oco2_data/'
dir_out = '/Users/ashitakarl/Library/CloudStorage/Dropbox/WB/MENA_WorldBank/Index/ADM2/'

# Define output FILE
VAR = 'CO2_'
YEAR_LIST = [2015, 2020]
FILE_OUT = 'ADM2_' + VAR + str(min(YEAR_LIST)) + '_' + str(max(YEAR_LIST)) + '.csv'

In [4]:
%matplotlib inline

### Load shapefile

In [5]:
# Load shapefile
MENA_shp = gpd.read_file(dir_shp + 'MENA_ADM2.shp')

MENA_shp.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

### Load nightlights rasters

In [6]:
df_full = pd.DataFrame(MENA_shp['ID_ADM'])

for YEAR in YEAR_LIST:
    with rasterio.open(dir_in + 'xco2_' + str(YEAR) + 
                       '_linear.tif') as raster:
        array = raster.read(1)
        affine = raster.transform
        nodata = raster.nodata

    MENA_shp.to_crs(raster.crs, inplace = True)

    # Extract raster by polygon
    dict_temp = zonal_stats(MENA_shp, array, affine = affine, nodata = nodata, 
                            interpolate = 'bilinear', all_touched = True,
                          stats = ['min', 'mean', 'max', 'median', 'std', 'count'])
    df_temp = pd.DataFrame(dict_temp)
    df_temp = df_temp.add_suffix('_' + str(YEAR))
    df_temp = df_temp.add_prefix(VAR)
    df_temp.insert(0, 'ID_ADM', MENA_shp['ID_ADM'])
    
    df_full = df_full.merge(df_temp, how = 'left', on = 'ID_ADM')





























































In [12]:
df_full.CO2_median_2015.isna().sum().sum()

0

In [15]:
df_full.CO2_median_2015.mean()

399.8965326822409

In [7]:
df_full

Unnamed: 0,ID_ADM,CO2_min_2015,CO2_max_2015,CO2_mean_2015,CO2_count_2015,CO2_std_2015,CO2_median_2015,CO2_min_2020,CO2_max_2020,CO2_mean_2020,CO2_count_2020,CO2_std_2020,CO2_median_2020
0,37631,399.607960,401.358550,400.399209,15,0.428803,400.466887,409.163792,414.569980,411.808040,15,1.468331,411.502822
1,37632,398.298823,400.135250,399.067373,30,0.476779,398.958232,409.627150,414.687820,412.038940,30,1.321080,412.115414
2,37633,396.497040,402.828550,399.624463,164,1.230059,399.816454,411.252160,416.116805,412.707501,164,1.154179,412.215215
3,37634,398.298823,399.331870,398.804309,44,0.247429,398.770686,409.504947,413.156740,410.843125,44,0.783237,410.767444
4,37635,397.834565,401.409114,399.242787,1166,0.543887,399.176802,408.650600,416.629880,411.987189,1166,0.971735,411.830835
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2932,15561,397.557237,403.184900,399.627698,60,1.296823,399.577296,409.300400,415.623524,412.041846,60,1.779249,411.703950
2933,15562,395.179840,403.802830,399.108872,1593,1.277502,399.050333,407.153750,416.193300,411.728523,1593,1.674308,411.773575
2934,15563,397.440965,404.915400,400.148452,67,1.350926,400.179754,410.582541,416.046059,413.403369,67,1.316005,413.657800
2935,15564,396.418676,404.678170,399.726487,308,1.549688,399.625266,408.293980,416.747500,412.694218,308,1.557666,412.800804


In [16]:
df_full.to_csv(dir_out + FILE_OUT, index = False)