In [1]:
import pandas as pd
import numpy as np
import xarray as xr
import rioxarray as rio
from scipy.stats import mode
from utils import *

# Estimate the net land sink globally and regionally from observational data

## 1. Introduction

## 2. Load data

### 2.1 Load data from the Global Carbon Project

In [2]:
GCB_data = pd.read_excel('../data/carbon_cycle/GCB/Global_Carbon_Budget_2023v1.1.xlsx',sheet_name='Global Carbon Budget',skiprows=21,index_col=0)*1e15

ocean_sink = pd.read_excel('../data/carbon_cycle/GCB/Global_Carbon_Budget_2023v1.1.xlsx',sheet_name='Ocean Sink',skiprows=30,index_col=0)

cement = pd.read_excel('../data/carbon_cycle/GCB/Global_Carbon_Budget_2023v1.1.xlsx',sheet_name='Cement Carbonation Sink',skiprows=9,index_col=0)

### 2.2. Inversions

In [3]:
GCB_inversions = xr.open_dataset('../data/carbon_cycle/atmospheric_inversions/GCP2023_inversions_1x1_version1_1_20240124.nc')

# set CRS and rename dimensions
GCB_inversions.rio.write_crs("EPSG:4326",inplace=True);
GCB_inversions = GCB_inversions.rename({'longitude':'x','latitude':'y'})

# get the inversion names and assign them as coordinates
inversion_names = np.array([''.join(GCB_inversions['ensemble_member_name'].values[i]) for i in range(GCB_inversions.sizes['ensemble_member'])])
GCB_inversions = GCB_inversions.assign_coords({'ensemble_member':inversion_names})

# get the grid cell area
cell_area = GCB_inversions['cell_area']

### 2.3. Regions

In [4]:
# load country data
countries_data = gpd.read_file('../data/country_data/country_data_w_RECCAP_Pan_FAO.shp')
countries_data['id'] = countries_data.index
# create reccap_regions
reccap_regions = countries_data.dissolve(by='RECCAP reg')

## 3. Define functions for the analysis

In [5]:
def analyze_gridded_inversions(region:gpd.GeoDataFrame,flux:xr.DataArray,cell_area:xr.DataArray,interp=False) -> pd.DataFrame:
    '''
    Sum land-atmosphere flux from inversion over a given set of regions.

    Parameters:
    region: gpd.GeoDataFrame
        Regions to sum over
    flux: xr.DataArray
        Land-atmosphere flux from inversion
    cell_area: xr.DataArray
        Area of each grid cell
    interp: bool
        Interpolate region boundaries

    Returns:
    pd.DataFrame
        Sum of flux over regions
    '''
    
    # calculate sum of flux over each region
    res = raster_vector_zonal_stats(region.reset_index(),flux*cell_area,'sum',interp=interp).unstack()

    # set index and column names
    # rename the indices to be the region names and the landcover types
    res.index =pd.MultiIndex.from_product([region.index,flux['ensemble_member'].values])
    res.columns = flux['time'].values
    
    return res

## 4. Run analysis

### 4.1. Global

In [6]:
# define the land sink as the residual of fossil fuels, atmospheric growth, ocean sink, and cement carbonation sink
land_sink = GCB_data.loc[1990:,['fossil emissions excluding carbonation','atmospheric growth','ocean sink','cement carbonation sink']]
land_sink.columns = ['FF','AGR','ocean','cement']

# calculate the standard deviation of the land sink
land_sink_std = land_sink.copy()

# fossil fuel emission uncertainty is 5% (1 sigma)
land_sink_std['FF_std'] = land_sink['FF']*0.05

# From Friedlingstein et al. 2023
# We estimate the uncertainty in the decadally averaged growth rate after 1980 at 0.02 Gt C yr−1 based on the calibration and the annual growth rate uncertainty but stretched over a 10-year interval. 
land_sink_std['AGR_std'] = 0.02e15

# take the uncertainty in the ocean sink from the GCB
land_sink_std['ocean_std'] = ocean_sink.loc[1990:,'1 sigma uncertainty']*1e15

# for cement, take the average CV between the two data sources in GBC from 1990
cement_CV = cement[['Cao','Huang']].std(axis=1).loc[1990:].mean()/cement[['Cao','Huang']].loc[1990:].mean().mean()
land_sink_std['cement_std'] = land_sink['cement'] * cement_CV

land_sink_std = land_sink_std[['FF_std','AGR_std','ocean_std','cement_std']]

# concatenate the land sink and its standard deviation
final_land_sink_estimate = pd.concat([land_sink,land_sink_std],axis=1,)

# save the results
final_land_sink_estimate.to_csv('../results/05_estimate_land_sink/global.csv')

### 4.2. Regional


In [7]:
# get the NEE
GCB_NEE = GCB_inversions['land_flux_only_fossil_cement_adjusted']

# get the fossil fuel emissions field
FF_inversions = GCB_inversions['fossil_fuel_emissions']

def analyze_dataset(ds):
    
    # calculate annual mean of the dataset
    ds = ds.resample(time='YE').mean()
    ds['time'] = ds['time'].dt.year

    # calculate regional sums and convert GtC to gC
    ds_regional = analyze_gridded_inversions(reccap_regions,ds,cell_area,interp=True)*1e15

    # replace zeros with nans
    ds_regional[ds_regional==0] = np.nan

    # set indices names
    ds_regional.index.names = ['region','inversion']

    return ds_regional

# analyze dataset
NEE_regional = analyze_dataset(GCB_NEE)
FF_regional = analyze_dataset(FF_inversions)

# save the results
NEE_regional.to_csv('../results/05_estimate_land_sink/NEE_regional.csv')
FF_regional.to_csv('../results/05_estimate_land_sink/FF_regional.csv')

### 4.3. Lateral fluxes

We rely on the corrected land to ocean flux map that is accompanying the inversion models.

In [8]:
# set the lateral river fluxes to the variable in the GCB inversion data
lateral_fluxes = GCB_inversions['river_adjustment_land_scaled']

# calculate the river fluxes over each region
region_river_fluxes = raster_vector_zonal_stats(reccap_regions.reset_index(),lateral_fluxes*cell_area,'sum',interp=True)

# set the index to be the region names
region_river_fluxes.index = reccap_regions.index

# convert from GtC to gC
region_river_fluxes = region_river_fluxes*1e15

# set series name
region_river_fluxes.name = 'river_flux'

# save the results
region_river_fluxes.to_csv('../results/05_estimate_land_sink/river_fluxes.csv')