In [1]:
import xarray as xr
import rioxarray as rio
import geopandas as gpd
import pandas as pd

# import utils file
import os
import sys
from pathlib import Path
sys.path.append(str(Path(os.getcwd()).parent) + "/code/")
import utils

# Test `calc_area`

In [2]:
# create mock data for testing that has dimenstions of 18 by 36 and is filled with ones
xu_data = rio.open_rasterio('../data/biomass/xu_et_al_2021/test10a_cd_ab_pred_corr_2000_2019_v2.tif')[0,:,:]
uniform_data = xr.ones_like(xu_data.rio.reproject(xu_data.rio.crs,shape=[18,36]))

# load reference surface area map used to calculate surface area
surf_area = rio.open_rasterio('../results/00_preprocessing/land_surface_area.nc').sel(band=1).drop_vars('band')

# test that the calc_area function is returning global values similar to the original surfance area map
assert abs((utils.calc_area(uniform_data).sum()- surf_area.sum())/surf_area.sum()) < 1e-3, "FAILED"

# Test `raster_vector_zonal_stats`

Test by calculating the area of each continent and comparing to results froom wikipedia

In [4]:
# load country data
countries_data = gpd.read_file('../data/country_data/country_data_w_RECCAP_Pan_FAO.shp')
countries_data['id'] = countries_data.index
continents = countries_data.dissolve('CONTINENT')

# load surface area file
surf_area = rio.open_rasterio('../results/00_preprocessing/land_surface_area.nc').sel(band=1).drop_vars('band')

# calculate the land area (incl. rivers) for the surface
sa = utils.calc_pixel_area(surf_area).where(surf_area>0)

# raster_vector_zonal_stat to calculate the area of each continent
continents['area'] = utils.raster_vector_zonal_stats(continents.reset_index(), sa, 'sum',interp=True).values

# define the reference data from wikipedia

# https://en.wikipedia.org/wiki/Continent
ref_sa = pd.Series([30365000e6, 44614000e6, 10000000e6, 24230000e6, 8510926e6, 17814000e6,14200000e6],index= ['Africa', 'Asia', 'Europe', 'North America', 'Oceania', 'South America','Antarctica'])

# find the relative difference between the calculated and the reference data
diff = continents['area'].sub(ref_sa,axis=0)

# sum the differences for Asia and Europe to get Eurasia
diff['Eurasia']  = diff.loc[['Asia','Europe']].sum()
ref_sa['Eurasia'] = ref_sa.loc[['Asia','Europe']].sum()
ref_sa.drop(['Asia','Europe'],inplace=True)
diff.drop(['Asia','Europe'],inplace=True)

# calculate the relative difference
diff = diff/ref_sa

# make sure the differences are less than 15%
assert (diff.abs().fillna(0)<0.15).all(), "FAIL"

# Test `down_sample`

In [38]:
# load a test map for testing the down_sample function
surf_area = rio.open_rasterio('../results/00_preprocessing/land_surface_area.nc').sel(band=1).drop_vars('band')

# calculate down smapled surface area map
sa_ds = utils.down_sample(surf_area,x_factor=10, y_factor=5, stat='sum')

# assert that the total sum remains the same
assert abs(sa_ds.sum() - surf_area.sum())/surf_area.sum() < 1e-4 , "FAILED"