In [1]:
import pandas as pd
import numpy as np
from glob import glob

# Harmonize data sources in time

## 1. Introduction

We convert carbon stocks data into carbon stock changes per year. We do that both for the living biomass observational data and for the DGVM cVeg data.

For the living biomass observation data, we have two aspects of the data that we need to harmonize in time:

1. Some data sources do not have annual data. We linearly interpolate the data between missing years

2. Besnard et al. reports the data in units of biomass change per year, where as all other sources report stocks per time period. We convert all stock data into stock changes per year.



## 2. Run analysis

### 2.1. Biomass observations

In [2]:
# load data
files = glob('../results/03_aggregate_regions/*regions.csv')

# extract file names
file_names = np.array([x.split('/')[-1].split('.')[0] for x in files])

# extract the data source from the file name
sources = [x.split('_')[0] for x in file_names]

# change LVODmin and LVODmax to LVOD
sources = ['LVOD' if x in ['LVODmin','LVODmax'] else x for x in sources]

# extract the method from the file name
method = ['_'.join(x.split('_')[:-1]) for x in file_names]

# create a file metadata dataframe
file_metadata = pd.DataFrame(np.array([files, sources,file_names]).T,columns=['file','source','method'])

In [3]:
def process_file(fn):
    # define a function to process files

    # load file and set two first columns as indices
    df  = pd.read_csv(fn,index_col=[0,1])

    # set the columns as integers
    df.columns = df.columns.astype(float).astype(int) 

    # set the names of the indices
    df.index.names = ['RECCAP reg','landcover']

    # add the file name to the indices
    ind = df.reset_index()[['RECCAP reg','landcover']]
    ind['file'] = fn
    df.index = pd.MultiIndex.from_frame(ind[['file','RECCAP reg','landcover']])

    return df

# process the files
merged_data = pd.concat([process_file(row['file']) for i,row in file_metadata.iterrows()])

# merge the data with the metadata to add source to each row
merged_data = merged_data.reset_index().merge(file_metadata, on='file').set_index(['source','method','RECCAP reg','landcover']).drop(columns='file')    

# sort data such that columns are in ascending order
merged_data = merged_data[merged_data.columns.sort_values()]

# interpolate missing years
merged_data = merged_data.interpolate(axis=1,limit_area='inside')

# get the Besnard et al. data
besnard_data = merged_data.loc[pd.IndexSlice['besnard',:,:]]

# apply a time derivative to all the data
merged_data = merged_data.diff(axis=1)

# reset the Besnard et al. values
merged_data.loc[pd.IndexSlice['besnard',:,:]] = besnard_data.values

# remove blank columns
merged_data = merged_data.loc[:,merged_data.sum() !=0]

# rename the indices
merged_data.index.names = ['source','method','region','landcover']

# save the results
merged_data.to_csv('../results/04_temporal_harmonization/harmonized_biomass_data.csv')

### 2.2. DGVM cVeg data

In [4]:
# load data
trendy_cVeg_regional = pd.read_csv('../results/03_aggregate_regions/regional_DGVM_cVeg.csv',index_col=[0,1])

# differentiate in time
trendy_cVeg_regional = trendy_cVeg_regional.diff(axis=1).dropna(axis=1)

# save the results
trendy_cVeg_regional.to_csv('../results/04_temporal_harmonization/harmonized_DGVM_cVeg.csv')