In [1]:
import pandas as pd
from glob import glob
import numpy as np

In [2]:
# load data
files = glob('../results/03_aggregate_regions/*regions.csv')

# extract file names
file_names = np.array([x.split('/')[-1].split('.')[0] for x in files])

# extract the data source from the file name
sources = [x.split('_')[0] for x in file_names]

# change LVODmin and LVODmax to LVOD
sources = ['LVOD' if x in ['LVODmin','LVODmax'] else x for x in sources]

# extract the method from the file name
method = ['_'.join(x.split('_')[:-1]) for x in file_names]

# create a file metadata dataframe
file_metadata = pd.DataFrame(np.array([files, sources,file_names]).T,columns=['file','source','method'])

In [25]:
pd.read_csv(file_metadata.loc[file_metadata['source']=='pan','file'].values[0],index_col=[0,1]).loc['Other'].mean(axis=1)#.diff(axis=1)

landcover
forest    1.738889e+15
dtype: float64

In [29]:
def process_file(fn):
    # define a function to process files

    # load file and set two first columns as indices
    df  = pd.read_csv(fn,index_col=[0,1])

    # set the columns as integers
    df.columns = df.columns.astype(float).astype(int) 

    # set the names of the indices
    df.index.names = ['RECCAP reg','landcover']

    # add the file name to the indices
    ind = df.reset_index()[['RECCAP reg','landcover']]
    ind['file'] = fn
    df.index = pd.MultiIndex.from_frame(ind[['file','RECCAP reg','landcover']])
    df.index.names = ['file','region','landcover']
    delta = (df.iloc[:,-1] - df.iloc[:,0])/(df.columns[-1]-df.columns[0])
    return delta

merged_data = pd.concat([process_file(row['file']) for i,row in file_metadata.iterrows()])

# merge the data with the metadata to add source to each row
merged_data = merged_data.reset_index().merge(file_metadata, on='file').set_index(['source','method','region','landcover']).drop(columns='file')    

stage_4 = pd.read_csv('../results/04_temporal_harmonization/harmonized_biomass_data.csv',index_col=[0,1,2,3])
mean_stage_4 = stage_4.mean(axis=1)
mean_stage_4.name = 'mean_stage_4'
merged_data = merged_data.merge(mean_stage_4,left_index=True,right_index=True,how='left')

# only besnard should be here
assert merged_data[np.abs(merged_data['mean_stage_4'] - merged_data[0])>1].drop(index='besnard').shape[0] == 0, "FAIL"