# MENA Analysis: CO2 Concentration
## 2015 - 2020

### Load packages and setup

In [76]:
import geopandas as gpd
import pandas as pd
import numpy as np
from rasterstats import zonal_stats
import rasterio
import netCDF4 as nc
from affine import Affine
import xarray as xr
from rasterio.crs import CRS
import regionmask
import os

In [77]:
# Define directories
# Define directories
dir_shp = 'M:/MENA/GEO/Boundaries/'
dir_in = 'M:/MENA/GEO/Hazards/CO2/raw/monthly/'
dir_out = 'M:/MENA/GEO/Hazards/CO2/final/'

# Define output FILE
VAR = 'CO2_'
YEAR_LIST = [2015]
MONTH_LIST = ['1','2']

In [78]:
%matplotlib inline

### Load shapefile

In [79]:
# Load shapefile
MENA_shp = gpd.read_file(dir_shp + 'MENA_ADM2.shp')

MENA_shp.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [80]:
#Function to mask and extract data:
def extract_values(file_path, shape_gdf):
    # Load .nc4 file as an xarray Dataset
    ds = xr.open_dataset(file_path)
    
    # Create a mask using regionmask
    mask = regionmask.mask_geopandas(shape_gdf, ds)
    
    # Only select values inside your shape
    masked_ds = ds.where(mask)

    # Return the masked dataset
    return masked_ds


In [81]:
#Iterate over all files, extract values and store in a list:
files = [os.path.join(dir_in, f) for f in os.listdir(dir_in) if f.endswith('.nc4')]

all_data = []
for file_path in sorted(files):
    extracted_data = extract_values(file_path, MENA_shp)
    all_data.append(extracted_data)

In [None]:
# Concatenate all datasets along a new dimension:
combined_ds = xr.concat(all_data, dim='time')


In [None]:
# Save the combined dataset to a new .nc4 file
combined_ds.to_netcdf('/path/to/combined_data.nc4')


In [68]:
# Load the shapefile using geopandas

all_stats = []

for YEAR in YEAR_LIST:
    for MONTH in MONTH_LIST:
        file_path = f"{dir_in}oco2_GEOS_L3CO2_month_{YEAR}0{MONTH}_B10206Ar.nc4"
        ds = xr.open_dataset(file_path)
        
       # Slice the array to make it 2D
        arr = ds['XCO2'].values[0, :, :]
        

        # Get the affine transform of the data
        transform = rasterio.transform.from_origin(ds.lon[0], ds.lat[-1], ds.lon[1] - ds.lon[0], ds.lat[1] - ds.lat[0])
        
        if 'crs' not in ds.attrs:
            ds_crs = CRS.from_string("EPSG:4326")
        else:
            ds_crs = CRS.from_string(ds.attrs['crs'])


        # Calculate statistics
        stats = zonal_stats(MENA_shp, arr, affine=transform, stats=["mean"])
        
        all_stats.append({
            'year': YEAR,
            'month': MONTH,
            'statistics': stats
        })
        
        ds.close()

df = pd.DataFrame(all_stats)
print(df)


   year month                                         statistics
0  2015     1  [{'mean': None, 'median': None, 'max': None, '...
1  2015     2  [{'mean': None, 'median': None, 'max': None, '...


### Load CO2 rasters

In [12]:
df_full.CO2_median_2015.isna().sum().sum()

0

In [15]:
df_full.CO2_median_2015.mean()

399.8965326822409

In [7]:
df_full

Unnamed: 0,ID_ADM,CO2_min_2015,CO2_max_2015,CO2_mean_2015,CO2_count_2015,CO2_std_2015,CO2_median_2015,CO2_min_2020,CO2_max_2020,CO2_mean_2020,CO2_count_2020,CO2_std_2020,CO2_median_2020
0,37631,399.607960,401.358550,400.399209,15,0.428803,400.466887,409.163792,414.569980,411.808040,15,1.468331,411.502822
1,37632,398.298823,400.135250,399.067373,30,0.476779,398.958232,409.627150,414.687820,412.038940,30,1.321080,412.115414
2,37633,396.497040,402.828550,399.624463,164,1.230059,399.816454,411.252160,416.116805,412.707501,164,1.154179,412.215215
3,37634,398.298823,399.331870,398.804309,44,0.247429,398.770686,409.504947,413.156740,410.843125,44,0.783237,410.767444
4,37635,397.834565,401.409114,399.242787,1166,0.543887,399.176802,408.650600,416.629880,411.987189,1166,0.971735,411.830835
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2932,15561,397.557237,403.184900,399.627698,60,1.296823,399.577296,409.300400,415.623524,412.041846,60,1.779249,411.703950
2933,15562,395.179840,403.802830,399.108872,1593,1.277502,399.050333,407.153750,416.193300,411.728523,1593,1.674308,411.773575
2934,15563,397.440965,404.915400,400.148452,67,1.350926,400.179754,410.582541,416.046059,413.403369,67,1.316005,413.657800
2935,15564,396.418676,404.678170,399.726487,308,1.549688,399.625266,408.293980,416.747500,412.694218,308,1.557666,412.800804


In [16]:
df_full.to_csv(dir_out + FILE_OUT, index = False)