In [7]:
"""
Using conservative water balance method to calculate the Bedrock Water Deficit 
Author: Xionghui Xu
Date: July 10, 2025
"""
import os
import subprocess
import numpy as np
import xarray as xr
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell

from tqdm import tqdm, trange
import netCDF4 as nc
import subprocess
import shutil
import math
from joblib import Parallel, delayed

InteractiveShell.ast_node_interactivity = "all"

'\nUsing conservative water balance method to calculate the Bedrock Water Deficit \nAuthor: Xionghui Xu\nDate: July 10, 2025\n'

In [8]:
#### Here we set the configuration for the water balance method
resolution = "500"
region = [-180,180,-60,90]
data_path = f'data/run/'
shp_path = 'data/Shp/'
fig_path = 'fig/'
path = 'data/'
mask_path = 'data/run/masking_criteria/'

# Calculate the Bedrock Water Storage Deficit

## 1. Data Preprocessing
## !!! Due to limited data storage, the data required for the data preprocessing process is not provided, so the following data preprocessing code cannot be used and is for reference only.

In [3]:
def run_command(command:str):
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        print(f"Error executing command: {command}")
        print(result.stderr)
        raise RuntimeError("CDO command failed")
    return result.stdout

In [None]:
"""
Calculate the masking criteria include: 
1. mask area without adequate water (sum ET>P from 2003 to 2020)
2. mask area without woody vegetation (IGBP=1~9)
3. mask area without shallow bedrock (DTB<150cm)
the masking criteria need to calculate after the Dbedrock calculation.
"""
# def mask():
#     def mask_adequate_water() -> None:
#         diff_file = os.path.join(path, 'diff', 'diff.nc4')
#         dir_path = os.path.join(path, 'masking_criteria')
#         os.makedirs(dir_path, exist_ok=True)
#         output_sum = os.path.join(dir_path, 'diff_sum.nc4')
#         output_interp = os.path.join(dir_path, 'diff_sum_interp.nc4')
#         mask_file = os.path.join(dir_path, 'mask_adequate_water.nc4')
#         run_command(f"cdo -f nc4 -P 48 timsum {diff_file} {output_sum}")
#         run_command(f"gdalwarp -multi -wo NUM_THREADS=48 -ot Float32 -of netCDF -co FORMAT=NC4 -r bilinear -ts 86400 43200 -overwrite {output_sum} {output_interp}")
#         run_command(f"cdo -f nc4 -z zip -setrtoc2,0,inf,nan,1 {output_interp} {mask_file}")
#         print("mask_adequate_water completed")

#     def mask_woody_veg() -> None:
#         dir_path = os.path.join(path, 'masking_criteria')
#         os.makedirs(dir_path, exist_ok=True)
#         igbp_file = os.path.join(dir_path, 'global_igbp_15s_2020.nc')
#         mask_file = os.path.join(dir_path, 'mask_woody_veg.nc4')
#         run_command(f"cdo -f nc4 -z zip -setrtoc2,1,9,1,nan {igbp_file} {mask_file}")
#         print("mask_woody_veg completed")

#     def mask_shallow_bedrock() -> None:
#         dir_path = os.path.join(path, 'masking_criteria')
#         os.makedirs(dir_path, exist_ok=True)
#         dtb_file = os.path.join(dir_path, 'average_soil_and_sedimentary-deposit_thickness_remap_cm.nc')
#         output_interp = os.path.join(dir_path, 'dtb_interp.nc4')
#         mask_file = os.path.join(dir_path, 'mask_shallow_bedrock.nc4')
#         run_command(f"gdalwarp -multi -wo NUM_THREADS=48 -ot Float32 -of netCDF -co FORMAT=NC4 -r bilinear -ts 86400 43200 -overwrite {dtb_file} {output_interp}")
#         run_command(f"cdo -f nc4 -z zip -setrtoc2,0,150,1,nan {output_interp} {mask_file}")
#         print("mask_shallow_bedrock completed")
    
#     mask_adequate_water()
#     mask_woody_veg()
#     mask_shallow_bedrock()

In [None]:
"""
Calculate the Ssoil:
1. calculate the DTB stratification (0~5~15~30~60~100~150 cm) to align the SAWS stratification (0~5~15~30~60~100~200 cm)
2. use the vertical stratification (DTB and SAWS) to calculate the Ssoil
""" 
# def Ssoil() -> None:
#     layer = [0, 5, 15, 30, 60, 100, 150]
#     dir_path = os.path.join(path, 'Ssoil')
#     os.makedirs(dir_path, exist_ok=True)
#     saws_path = os.path.join(dir_path, 'SAWS_Kosugi')
#     ssoil_file = os.path.join(dir_path, f'Ssoil.nc4')

#     def DTB_layer() -> None:
#         dtb_file = os.path.join(dir_path, 'average_soil_and_sedimentary-deposit_thickness_remap_cm.nc')
#         image = xr.open_dataset(dtb_file)
#         s = image['Band1']
#         for i in range(len(layer)-1):
#             dtb_layer_file = os.path.join(dir_path, f'DTB_layer{i+1}.nc')
#             delta_s = s - layer[i]
#             delta_s = np.where(delta_s>(layer[i+1]-layer[i]), (layer[i+1]-layer[i]), delta_s)
#             delta_s = np.where(delta_s<0, 0, delta_s)
            
#             shutil.copyfile(f'{saws_path}/saws{i+1}.nc', dtb_layer_file)
#             with nc.Dataset(dtb_layer_file, 'a') as file:
#                 s_var = file.variables['Band1']
#                 s_var[:,:] = delta_s      
#         print("DTB_layer completed")

#     DTB_layer()
#     for i in range(len(layer)-1):
#         dtb_layer_file = os.path.join(dir_path, f'DTB_layer{i+1}.nc')
#         saws_layer_file = os.path.join(dir_path, f'saws{i+1}.nc')
#         ssoil_layer_file = os.path.join(dir_path, f'Ssoil_layer{i+1}.nc4')
#         run_command(f'ln -sf {saws_path}/saws{i+1}.nc {saws_layer_file}')
#         run_command(f'cdo -f nc4 -z zip -mulc,10 -mul {saws_layer_file} {dtb_layer_file} {ssoil_layer_file}')
#     filelist = [f'{dir_path}/Ssoil_layer{i+1}.nc4' for i in range(len(layer)-1)]
#     filelistname = ' '.join(filelist)
#     run_command(f'cdo -f nc4 -z zip -enssum {filelistname} {ssoil_file}')
#     print("Ssoil completed")

In [None]:
"""
Calculate some other variables include:
1. SnowCover: convert the snowcover(%) to if snow(>=10 -> 0 and <10 -> 1)
2. IGBP: sel the time
3. Koppen: translate the tif to nc4, and remaplaf from 1km to 500m
4. area: calculate the area for 500m and 0p1
5. DTB: calculate some DTB for different sources
"""
# def other():
#     def SnowCover():
#         # py_file = os.path.join('/stu01/xuxh22/Bedrock/preprocess/', 'pre_SC.py')
#         # run_command(f'python {py_file}')
#         dir_path = os.path.join(path, 'SC')
#         os.makedirs(dir_path, exist_ok=True)
#         sc_0p05_file = os.path.join(dir_path, 'SnowCover_0p05.nc')
#         sc_0p1_file = os.path.join(dir_path, 'SnowCover_0p1.nc4')
#         sc_0p1_mask_file = os.path.join(dir_path, 'SnowCover_0p1_mask.nc4')
#         # The snow cover fraction should be kept at the same resolution as diff_3.nc of Sr and Dr Data, from 0.05° to 0.1°
#         run_command(f"gdalwarp -multi -wo NUM_THREADS=48 -ot Float32 -of netCDF -co FORMAT=NC4 -r bilinear -ts 3600 1800 -overwrite {sc_0p05_file} {sc_0p1_file}")
#         run_command(f'cdo -f nc4 -z zip -setrtoc2,10,100,0,1 {sc_0p1_file} {sc_0p1_mask_file}')
#         print("SnowCover completed")

#     def IGBP():
#         dir_path = os.path.join(path, 'IGBP')
#         os.makedirs(dir_path, exist_ok=True)
#         origin_file = os.path.join(dir_path, 'global_igbp_15s_2020.nc')
#         igbp_file = os.path.join(dir_path, 'IGBP.nc4')
#         run_command(f'cdo -f nc4 -z zip -seltimestep,1 {origin_file} {igbp_file}')
#         print("IGBP completed")

#     def Koppen():
#         dir_path = os.path.join(path, 'Koppen')
#         os.makedirs(dir_path, exist_ok=True)
#         origin_tif_file = os.path.join(dir_path, 'Beck_KG_V1_present_0p0083.tif')
#         origin_nc4_file = os.path.join(dir_path, 'Beck_KG_V1_present_0p0083.nc4')
#         remap_file = os.path.join(path, '500.txt')
#         koppen_file = os.path.join(dir_path, 'Koppen.nc4')
#         run_command(f"gdal_translate -of netCDF -co FORMAT=NC4 -a_srs EPSG:4326 {origin_tif_file} {origin_nc4_file}")
#         run_command(f"cdo -f nc4 -z zip -b I32 -P 48 --no_remap_weights remaplaf,{remap_file} {origin_nc4_file} {koppen_file}")
#         print("Koppen completed")

#     def area():
#         dir_path = os.path.join(path, 'Area')
#         os.makedirs(dir_path, exist_ok=True)
#         data_file = os.path.join(path, 'diff', 'diff.nc')
#         area_file = os.path.join(dir_path, 'Area.nc')
#         # area_file = os.path.join(dir_path, 'Area_0p1.nc')

#         def count_area(lat1,lat2):
#             lat1,lat2 = map(math.radians,[lat1,lat2])
#             r = 6.37122e6
#             dlon = 0.00416666688397527
#             # dlon = 0.1
#             dlon_rad = math.radians(dlon)
#             area = abs(r**2 * dlon_rad * (math.sin(lat2)-math.sin(lat1)))
#             # print(area)
#             return area

#         data = xr.open_dataset(data_file)
#         lat = data['lat']
#         lon = data['lon']
#         inc = 0.00416666688397527
#         # inc = 0.1
#         lat1 = lat-inc/2
#         lat2 = lat+inc/2    
#         grid1,grid2 = np.meshgrid(lon, lat)
#         area = np.zeros_like(grid1)
#         result = Parallel(n_jobs=12)(delayed(count_area)(lat1[i], lat2[i]) for i in range(len(lat)))
#         for i in range(len(lat)):
#             area[i, :] = result[i]
#             print(area[i,0])
#         print(f'The total area of the earth: {np.sum(area):.3f} $m^2$')
                
#         output_ds = xr.Dataset({'area': (('lat', 'lon'), area)},
#                             coords={'lat': data['lat'], 'lon': data['lon']})
#         print(area)
#         print(f'The total area of the earth: {np.sum(area)/1e12:.3f} million $km^2$')
#         output_ds.to_netcdf(area_file)
#         print("Area completed")

#     def DTB():
#         # Iowa measured data, the data is given by Shangguan et al.
#         dir_path = os.path.join(path, 'DTB', 'DTB_Iowa')
#         os.makedirs(dir_path, exist_ok=True)
#         tif_file = os.path.join(dir_path, 'Iowa.tif')
#         nc_file = os.path.join(dir_path, 'Iowa.nc4')
#         run_command(f'gdal_translate -of netCDF -co FORMAT=NC4 -a_srs EPSG:4326 {tif_file} {nc_file}')
        
#         # Send the processed Soilgrids data cp over
#         dir_path = os.path.join(path, 'DTB', 'DTB_Shangguan')
#         os.makedirs(dir_path, exist_ok=True)
#         tif_file = os.path.join(dir_path, 'BDTICM_M_250m_ll.tif')
#         nc_file = os.path.join(dir_path, 'DTB_Shangguan.nc4')
#         run_command(f"gdalwarp -multi -wo NUM_THREADS=48 -ot Float32 -of netCDF -co FORMAT=NC4 -r bilinear -ts 86400 43200 -t_srs EPSG:4326 -te -180 -90 180 90 -overwrite {tif_file} {nc_file}")
        
#         # gNATSGO bedrock data exported by GEE, 2 of which contains the Iowa region
#         dir_path = os.path.join(path, 'DTB', 'DTB_gNATSGO')
#         os.makedirs(dir_path, exist_ok=True)
#         gNATSGO_file = os.path.join(dir_path, 'DTB_gNATSGO.nc4')
#         for i in range(8):
#             tif_file = os.path.join(dir_path, f'Bedrock_US_gNATSGO_90m-{i+1}.tif')
#             nc_file = os.path.join(dir_path, f'Bedrock_US_gNATSGO_90m-{i+1}.nc4')
#             run_command(f'gdal_translate -of netCDF -co FORMAT=NC4 -a_srs EPSG:4326 {tif_file} {nc_file}')
#         filelist = [f'{dir_path}/Bedrock_US_gNATSGO_90m-{i}.nc4' for i in range(5, 9)] + [f'{dir_path}Bedrock_US_gNATSGO_90m-{i}.nc4' for i in range(1, 5)]
#         filelistname = ' '.join(filelist)
#         run_command(f'cdo -f nc4 -z zip -collgrid {filelistname} {gNATSGO_file}')
    
#     SnowCover()
#     IGBP()
#     Koppen()
#     area()
#     DTB()

## 2. Rolling calculate the first temporary Bedrock Water Storage Deficit and Root-zone Water Storage Deficit

In [None]:
ds = xr.open_dataset(f'{data_path}diff.nc4') # ensemble mean ET - ensemble mean P, 8-day, 0p1, 2003.01.01-2020.12.31
current_diff = ds['et']
ds2 = xr.open_dataset(f'{data_path}SnowCover.nc4') # snowcover>10 -> 0, snowcover<=10 -> 1, 8-day, 0p1,  2003.01.01-2020.12.31
snowf = ds2['snowf']
ds3 = xr.open_dataset(f'{data_path}Ssoil_0p1.nc4') # soil water storage capacity, 0p1
ssoil = ds3['Band1']

In [20]:
# Set the annual data format 
years = pd.date_range(start='2003-01-01', end='2020-01-01', freq='YS')

time_array = xr.DataArray(
    years,
    dims=['time'],
    coords={'time': years},
    name='time',
    attrs={
        'standard_name': 'time',
        'axis': 'T'
    }
)

print(time_array)

# Obtain shape
shape = current_diff.isel(time=0).shape
time_len = len(ds.time) # 828

<xarray.DataArray 'time' (time: 18)>
array(['2003-01-01T00:00:00.000000000', '2004-01-01T00:00:00.000000000',
       '2005-01-01T00:00:00.000000000', '2006-01-01T00:00:00.000000000',
       '2007-01-01T00:00:00.000000000', '2008-01-01T00:00:00.000000000',
       '2009-01-01T00:00:00.000000000', '2010-01-01T00:00:00.000000000',
       '2011-01-01T00:00:00.000000000', '2012-01-01T00:00:00.000000000',
       '2013-01-01T00:00:00.000000000', '2014-01-01T00:00:00.000000000',
       '2015-01-01T00:00:00.000000000', '2016-01-01T00:00:00.000000000',
       '2017-01-01T00:00:00.000000000', '2018-01-01T00:00:00.000000000',
       '2019-01-01T00:00:00.000000000', '2020-01-01T00:00:00.000000000'],
      dtype='datetime64[ns]')
Coordinates:
  * time     (time) datetime64[ns] 2003-01-01 2004-01-01 ... 2020-01-01
Attributes:
    standard_name:  time
    axis:           T


### 2.1 Annual Water Storage Deficit Method
### Calculate the Dr (the annual root-zone water storgae deficit), Dbedrock (the annual bedrock water storage deficit), D_time (the time state: frequency, duration, initial day)

In [None]:
# Initialize variables
dr = np.zeros((18, *shape))
dbedrock = np.zeros((18, *shape))

use_dbedrock_frequency_per_year = np.zeros((18, *shape))
use_dbedrock_duration_per_year = np.zeros((18,*shape))
use_dbedrock_first_day = np.zeros((18,*shape))

use_dbedrock_sum_frequency = np.zeros(shape)
use_dbedrock_sum_duration = np.zeros(shape)

use_dbedrock_mean_duration_per_use = np.zeros(shape)
use_dbedrock_max_duration_per_use = np.zeros(shape)

In [None]:
# Calculate the Dbedrock,Y and related time statistics
for j in range(18):
    print(f"year is {j+2003}")

    # Initialize variables
    current_cwd = np.zeros(shape) 
    use_dbedrock_duration_per_use = np.zeros(shape)

    # Loop through time indices for the current year (46 8-days periods per year)
    for i in range(0+46*j,46+46*j):
        print(f"Processing time index: {i}")
        # Calculate start and end days for the period, adjusting for leap years
        day_stt = 8*(i-46*j)+1
        day_end = 8*(i-46*j)+1+\
            ((5 if ((j + 2003) % 4 == 0 and ((j + 2003) % 100 != 0 or (j + 2003) % 400 == 0)) else 4)\
            if (i+1) % 46 == 0 else 7)
        day_duration = day_end-day_stt+1
        print(f"the period {i-46*j+1:3} day from {day_stt:4} to {day_end:4}")
        print(f"the period {i-46*j+1:3} day is {day_duration:1}")

        # Calculate current delta_tn, cwd and sr
        current_delta_tn = current_diff.isel(time=i).values * snowf.isel(time=i).values
        last_cwd = current_cwd
        current_cwd = np.where(current_delta_tn >= 0, current_cwd + current_delta_tn, 0)
        dr[j,:,:] = np.maximum(dr[j,:,:], current_cwd)

        # Calculate the first day, duration and all time periods of using bedrock water  
        mask1 = current_cwd > ssoil
        mask2 = last_cwd > ssoil
        mask3 = last_cwd <= ssoil

        use_dbedrock_frequency_per_year[j,:,:] = np.where(mask1 & mask3, use_dbedrock_frequency_per_year[j,:,:]+1, use_dbedrock_frequency_per_year[j,:,:])
        use_dbedrock_duration_per_year [j,:,:] = np.where(mask1, use_dbedrock_duration_per_year[j,:,:]+day_duration, use_dbedrock_duration_per_year[j,:,:])
        use_dbedrock_first_day[j,:,:] = np.where(mask1 & (use_dbedrock_first_day[j,:,:] == 0), day_stt, use_dbedrock_first_day[j,:,:])

        use_dbedrock_sum_frequency = np.where(mask1 & mask3, use_dbedrock_sum_frequency+1, use_dbedrock_sum_frequency)
        use_dbedrock_sum_duration = np.where(mask1, use_dbedrock_sum_duration + day_duration, use_dbedrock_sum_duration)

        use_dbedrock_duration_per_use = np.where(mask1, use_dbedrock_duration_per_use + day_duration, 0)
        use_dbedrock_max_duration_per_use = np.where(use_dbedrock_duration_per_use>use_dbedrock_max_duration_per_use, use_dbedrock_duration_per_use, use_dbedrock_max_duration_per_use)

    dbedrock[j,:,:] = np.where(dr[j,:,:] > ssoil, dr[j,:,:] - ssoil, 0)
use_dbedrock_mean_duration_per_use = np.where(use_dbedrock_sum_frequency > 0, 
                                                np.divide(use_dbedrock_sum_duration, use_dbedrock_sum_frequency, where=use_dbedrock_sum_frequency > 0)
                                                , 0)

In [None]:
# Define coordinates upfront
coords = {'lat': ds['lat'], 'lon': ds['lon']}
time_coords = {'time': time_array, 'lat': ds['lat'], 'lon': ds['lon']}

# Define datasets and their metadata
datasets = [
    # Annual data
    {'name': 'Dr', 'data': dr, 'dims': ('lat', 'lon'), 'years': range(2003, 2021), 'file_prefix': 'Dr'},
    {'name': 'Dbedrock', 'data': dbedrock, 'dims': ('lat', 'lon'), 'years': range(2003, 2021), 'file_prefix': 'Dbedrock'},
    # Aggregate data
    {'name': 'Duration', 'data': use_dbedrock_max_duration_per_use, 'dims': ('lat', 'lon'), 'file_prefix': 'D_max_duration_per_use'},
    {'name': 'Duration', 'data': use_dbedrock_mean_duration_per_use, 'dims': ('lat', 'lon'), 'file_prefix': 'D_mean_duration_per_use'},
    {'name': 'Frequency', 'data': use_dbedrock_sum_frequency, 'dims': ('lat', 'lon'), 'file_prefix': 'D_sum_frequency'},
    {'name': 'Duration', 'data': use_dbedrock_sum_duration, 'dims': ('lat', 'lon'), 'file_prefix': 'D_sum_duration'},
    # Per-year data
    {'name': 'Frequency', 'data': use_dbedrock_frequency_per_year, 'dims': ('time', 'lat', 'lon'), 'file_prefix': 'D_frequency_per_year', 'coords': time_coords},
    {'name': 'Duration', 'data': use_dbedrock_duration_per_year, 'dims': ('time', 'lat', 'lon'), 'file_prefix': 'D_duration_per_year', 'coords': time_coords},
    {'name': 'First_Day', 'data': use_dbedrock_first_day, 'dims': ('time', 'lat', 'lon'), 'file_prefix': 'D_first_day', 'coords': time_coords},
]

D_time_path = os.path.join(data_path, 'D_time')

for ds_info in datasets:
    try:
        data = ds_info['data']
        name = ds_info['name']
        dims = ds_info['dims']
        file_prefix = ds_info['file_prefix']
        ds_coords = ds_info.get('coords', coords)  # Default to lat/lon coordinates

        if 'years' in ds_info:  # Process annual data
            for i, year in enumerate(ds_info['years']):
                output_ds = xr.Dataset({name: (dims, data[i, :, :])},
                                      coords=ds_coords)
                output_ds.to_netcdf(f'{D_time_path}{file_prefix}_{year}_tmp1.nc4')
        else:  # Process aggregate data
            output_ds = xr.Dataset({name: (dims, data)},
                                  coords=ds_coords)
            output_ds.to_netcdf(f'{D_time_path}{file_prefix}_tmp1.nc4')
    except Exception as e:
        print(f"Error saving {file_prefix}: {e}")

### 2.2 Non-Reset Cumulative Water Storage Capacity Method 
### Calculate the Sr (the root-zone water storgae), Sbedrock (the bedrock water storage)

In [None]:
# Initialize variables
current_cwd = np.zeros(shape) 
sr = np.zeros(shape)
sbedrock = np.zeros(shape)

In [None]:
# Calculate the water balance from 2003 to 2020
for i in range(time_len):
    print(f"Processing time index: {i}")
    day_stt = 8*i+1-3*(i//46)+((i//46)+2)//4
    day_end = 8*(i+1)+1-3*((i+1)//46)+(((i+1)//46)+2)//4-1
    day_duration = day_end-day_stt+1
    j = i//46
    year = j+2003
    print(year)
    print(f"the period {i+1:3} day from {day_stt:4} to {day_end:4}")
    print(f"the period {i+1:3} day is {day_duration:1}")

    # Calculate current delta_tn, cwd and sr
    current_delta_tn = current_diff.isel(time=i).values * snowf.isel(time=i).values
    last_cwd = current_cwd
    current_cwd = np.where(current_delta_tn >= 0, current_cwd + current_delta_tn, 0)
    sr = np.maximum(sr, current_cwd)

sbedrock = np.where(sr > ssoil, sr - ssoil, 0)

In [None]:
# Save the results to the first temporary NetCDF files
output_ds = xr.Dataset({'Sr': (('lat', 'lon'), sr)},
                    coords={'lat': ds['lat'], 'lon': ds['lon']})
output_ds.to_netcdf(f'{data_path}Sr_tmp1.nc4')

output_ds1 = xr.Dataset({'Sbedrock': (('lat', 'lon'), sbedrock)},
                    coords={'lat': ds['lat'], 'lon': ds['lon']})
output_ds1.to_netcdf(f'{data_path}Sbedrock_tmp1.nc4')

## 3. Data Postprocessing

### 3.1 Calculate the last masking criteria: max(Dbedrock) > 0

In [None]:
def dp_Dbedrock_max():
    name_list = 'cdo -O -ensmax '
    for year in range(2003,2021):
        name = f'{D_time_path}Dbedrock_{year}_tmp1.nc4'
        name_list = name_list+' '+name
    output_file = f'{data_path}Dbedrock_max_tmp1.nc4'
    print(name_list+' '+output_file)
    os.system(name_list+' '+output_file)

    subprocess.run(f"cdo -b F32 -P 48 --no_remap_weights remapbil,{data_path}{resolution}.txt {data_path}Dbedrock_max_tmp1.nc4 {data_path}Dbedrock_max_tmp2.nc4", shell=True, check=True)
    run_command(f"cdo -O -f nc4 -z zip -setrtoc2,-inf,1e-1,nan,1 {data_path}Dbedrock_max_tmp2.nc4 {mask_path}mask_Dbedrock_gt_0.nc4")

def mask_combine():
    """Combine the mask files for different criteria into one file
    """
    mask_files = [
        f'{mask_path}mask_adequate_water.nc4',
        f'{mask_path}mask_woody_veg.nc4',
        f'{mask_path}mask_shallow_bedrock.nc4',     
        f'{mask_path}mask_Dbedrock_gt_0.nc4']

    run_command(f"cdo -O mul {mask_files[0]} {mask_files[1]} {mask_path}mask_combine_12.nc4")
    run_command(f"cdo -O mul {mask_path}mask_combine_12.nc4 {mask_files[2]} {mask_path}mask_combine_123.nc4")
    run_command(f"cdo -O mul {mask_path}mask_combine_123.nc4 {mask_files[3]} {mask_path}mask_combine_all.nc4")

dp_Dbedrock_max()
mask_combine()

### 3.2 Postprocess for Dbedrock, Dr, and D time state 

In [None]:
# Set cdo function to use parallel operations 
def cdo_mul(filename1, filename2, filename3):
    subprocess.run(f"cdo mul {filename1} {filename2} {filename3}", shell=True, check=True)

In [None]:
def dp_Dr():
    # remap the 0p1 resolution to 0p1 resolution(no need, but for the sake of formatting consistency)
    for j in range(18):
        print(f"year is {j+2003}")
        subprocess.run(f"cdo -f nc4 -z zip -b F32 -P 48 --no_remap_weights -remapbil,{data_path}{resolution}.txt {D_time_path}Dr_{2003+j}_tmp1.nc4 {D_time_path}Dr_{2003+j}_tmp2.nc4", shell=True, check=True)
    Parallel(n_jobs=5)(delayed(cdo_mul)(f"{D_time_path}Dr_{2003+j}_tmp2.nc4", f"{mask_path}mask_combine_all.nc4", f"{D_time_path}Dr_{2003+j}.nc4") for j in tqdm(range(18)))

def dp_Dbedrock():
    # remap the 0p1 resolution to 0p1 resolution(no need, but for the sake of formatting consistency)
    for j in range(18):
        print(f"year is {j+2003}")
        subprocess.run(f"cdo -f nc4 -z zip -b F32 -P 48 --no_remap_weights -remapbil,{data_path}{resolution}.txt {D_time_path}Dbedrock_{2003+j}_tmp1.nc4 {D_time_path}Dbedrock_{2003+j}_tmp2.nc4", shell=True, check=True)

    Parallel(n_jobs=5)(delayed(cdo_mul)(f"{D_time_path}Dbedrock_{2003+j}_tmp2.nc4", f"{mask_path}mask_combine_all.nc4", f"{D_time_path}Dbedrock_{2003+j}.nc4") for j in tqdm(range(18)))

dp_Dr()
dp_Dbedrock()

In [None]:
def dp_Dbedrock_Frequency():
    # calculate Dbedrock Frequency
    s1 = 0
    for year in range(2003,2021):
        file = f'{D_time_path}Dbedrock_{year}_tmp2.nc4'
        image = xr.open_dataset(file)
        s = image['Dbedrock']

        print(f'year: {year}, min: {s.min().values}, max: {s.max().values}')
        
        s = np.where(s > 0, 2, np.where(s < 0, 1, 0))
        print(f'year: {year}, min: {s.min()}, max: {s.max()}')
        
        print(s.min(),s.max())
        
        if year == 2003:
            s1 = s
        else:
            s1 = s*s1
        
        s_nonan = np.where((s1<0), 0, s1)
        print(s1.min(),s1.max(),np.mean(s_nonan))
        image.close()
    print('end do')

    s1 = np.where((s1 >=2) & (s1 < 262144), 2, s1)
    s1 = np.where((s1 == 1), 3, s1)
    s1 = np.where(s1==262144, 1, s1)

    file_mask = f'{mask_path}mask_combine_12.nc4'
    mask = xr.open_dataset(file_mask)
    s2 = mask['Band1']
    print(s1.min(),s1.max())

    s1 = np.where((s1==0) & (s2 == 1), 4, s1)

    print(s1.min(),s1.max())
    shutil.copyfile(f'{D_time_path}Dbedrock_2003.nc4', f'{data_path}Dbedrock_Frequency_tmp1.nc4')

    with nc.Dataset(f'{data_path}Dbedrock_Frequency_tmp1.nc4', 'a') as file:
        s_var = file.variables['Dbedrock']
        new_s_data = s1 
        s_var[:,:] = new_s_data

    os.system(f'cdo mul {data_path}Dbedrock_Frequency_tmp1.nc4 {mask_path}mask_combine_all.nc4 {data_path}Dbedrock_Frequency.nc4')

dp_Dbedrock_Frequency()

In [None]:
def dp_frequency_per_year():
    os.system(f'cdo -setrtoc,-0.5,0.5,nan {D_time_path}D_frequency_per_year_tmp1.nc4 {D_time_path}D_frequency_per_year_set0_to_nan_tmp1.nc4')
    os.system(f'cdo timmean {D_time_path}D_frequency_per_year_set0_to_nan_tmp1.nc4 {D_time_path}D_frequency_per_year_mean_tmp1.nc4')
    os.system(f'cdo timmax {D_time_path}D_frequency_per_year_set0_to_nan_tmp1.nc4 {D_time_path}D_frequency_per_year_max_tmp1.nc4')

    os.system(f'cdo -f nc4 -z zip -b F32 -P 48 --no_remap_weights -remapbil,{data_path}{resolution}.txt {D_time_path}D_frequency_per_year_mean_tmp1.nc4 {D_time_path}D_frequency_per_year_mean_tmp2.nc4')
    os.system(f"cdo mul {D_time_path}D_frequency_per_year_mean_tmp2.nc4 {mask_path}mask_combine_all.nc4 {D_time_path}D_frequency_per_year_mean.nc4")

    os.system(f'cdo -f nc4 -z zip -b F32 -P 48 --no_remap_weights -remapbil,{data_path}{resolution}.txt {D_time_path}D_frequency_per_year_max_tmp1.nc4 {D_time_path}D_frequency_per_year_max_tmp2.nc4')
    os.system(f"cdo mul {D_time_path}D_frequency_per_year_max_tmp2.nc4 {mask_path}mask_combine_all.nc4 {D_time_path}D_frequency_per_year_max.nc4")


def dp_sum_frequency():
    os.system(f'cdo -setrtoc,-0.5,0.5,nan {D_time_path}D_sum_frequency_tmp1.nc4 {D_time_path}D_sum_frequency_tmp2.nc4')
    os.system(f'cdo -f nc4 -z zip -b F32 -P 48 --no_remap_weights -remapbil,{data_path}{resolution}.txt {D_time_path}D_sum_frequency_tmp2.nc4 {D_time_path}D_sum_frequency_tmp3.nc4')
    os.system(f"cdo mul {D_time_path}D_sum_frequency_tmp3.nc4 {mask_path}mask_combine_all.nc4 {D_time_path}D_sum_frequency.nc4")

def dp_duration_per_year():
    os.system(f'cdo -setrtoc,-0.5,0.5,nan {D_time_path}D_duration_per_year_tmp1.nc4 {D_time_path}D_duration_per_year_set0_to_nan_tmp1.nc4')
    os.system(f'cdo timmean {D_time_path}D_duration_per_year_set0_to_nan_tmp1.nc4 {D_time_path}D_duration_per_year_mean_tmp1.nc4')
    os.system(f'cdo timmax {D_time_path}D_duration_per_year_set0_to_nan_tmp1.nc4 {D_time_path}D_duration_per_year_max_tmp1.nc4')

    os.system(f'cdo -f nc4 -z zip -b F32 -P 48 --no_remap_weights -remapbil,{data_path}{resolution}.txt {D_time_path}D_duration_per_year_mean_tmp1.nc4 {D_time_path}D_duration_per_year_mean_tmp2.nc4')
    os.system(f"cdo mul {D_time_path}D_duration_per_year_mean_tmp2.nc4 {mask_path}mask_combine_all.nc4 {D_time_path}D_duration_per_year_mean.nc4")

    os.system(f'cdo -f nc4 -z zip -b F32 -P 48 --no_remap_weights -remapbil,{data_path}{resolution}.txt {D_time_path}D_duration_per_year_max_tmp1.nc4 {D_time_path}D_duration_per_year_max_tmp2.nc4')
    os.system(f"cdo mul {D_time_path}D_duration_per_year_max_tmp2.nc4 {mask_path}mask_combine_all.nc4 {D_time_path}D_duration_per_year_max.nc4")

def dp_duration_per_use():
    os.system(f'cdo -setrtoc,-0.5,0.5,nan {D_time_path}D_duration_per_use_max_tmp1.nc4 {D_time_path}D_duration_per_use_max_tmp2.nc4')
    os.system(f'cdo -f nc4 -z zip -b F32 -P 48 --no_remap_weights -remapbil,{data_path}{resolution}.txt {D_time_path}D_duration_per_use_max_tmp2.nc4 {D_time_path}D_duration_per_use_max_tmp3.nc4')
    os.system(f"cdo mul {D_time_path}D_duration_per_use_max_tmp3.nc4 {mask_path}mask_combine_all.nc4 {D_time_path}D_duration_per_use_max.nc4")

    os.system(f'cdo -setrtoc,-0.5,0.5,nan {D_time_path}D_duration_per_use_mean_tmp1.nc4 {D_time_path}D_duration_per_use_mean_tmp2.nc4')
    os.system(f'cdo -f nc4 -z zip -b F32 -P 48 --no_remap_weights -remapbil,{data_path}{resolution}.txt {D_time_path}D_duration_per_use_mean_tmp2.nc4 {D_time_path}D_duration_per_use_mean_tmp3.nc4')
    os.system(f"cdo mul {D_time_path}D_duration_per_use_mean_tmp3.nc4 {mask_path}mask_combine_all.nc4 {D_time_path}D_duration_per_use_mean.nc4")

def dp_sum_duration():
    os.system(f'cdo -setrtoc,-0.5,0.5,nan {D_time_path}D_sum_duration_tmp1.nc4 {D_time_path}D_sum_duration_tmp2.nc4')
    os.system(f'cdo -f nc4 -z zip -b F32 -P 48 --no_remap_weights -remapbil,{data_path}{resolution}.txt {D_time_path}D_sum_duration_tmp2.nc4 {D_time_path}D_sum_duration_tmp3.nc4')
    os.system(f"cdo mul {D_time_path}D_sum_duration_tmp3.nc4 {mask_path}mask_combine_all.nc4 {D_time_path}D_sum_duration.nc4")

def dp_first_day():
    os.system(f'cdo -setrtoc,-0.5,0.5,nan {D_time_path}D_first_day_tmp1.nc4 {D_time_path}D_first_day_set0_to_nan_tmp1.nc4')
    os.system(f'cdo timmax {D_time_path}D_first_day_set0_to_nan_tmp1.nc4 {D_time_path}D_first_day_max_tmp1.nc4')
    os.system(f'cdo timmin {D_time_path}D_first_day_set0_to_nan_tmp1.nc4 {D_time_path}D_first_day_min_tmp1.nc4')
    os.system(f'cdo sub {D_time_path}D_first_day_max_tmp1.nc4 {D_time_path}D_first_day_min_tmp1.nc4 {D_time_path}D_first_day_max_sub_min_tmp1.nc4')

    os.system(f'cdo -f nc4 -z zip -b F32 -P 48 --no_remap_weights -remapbil,{data_path}{resolution}.txt {D_time_path}D_first_day_min_tmp1.nc4 {D_time_path}D_first_day_min_tmp2.nc4')
    os.system(f"cdo mul {D_time_path}D_first_day_min_tmp2.nc4 {mask_path}mask_combine_all.nc4 {D_time_path}D_first_day_min.nc4")

    os.system(f'cdo -f nc4 -z zip -b F32 -P 48 --no_remap_weights -remapbil,{data_path}{resolution}.txt {D_time_path}D_first_day_max_sub_min_tmp1.nc4 {D_time_path}D_first_day_max_sub_min_tmp2.nc4')
    os.system(f"cdo mul {D_time_path}D_first_day_max_sub_min_tmp2.nc4 {mask_path}mask_combine_all.nc4 {D_time_path}D_first_day_max_sub_min.nc4")

dp_frequency_per_year()
dp_sum_frequency()
dp_duration_per_year()
dp_duration_per_use()
dp_sum_duration()
dp_first_day()

### 3.3 Postprocess for Sbedrock and Sr

In [None]:
remap_command = f'cdo -f nc4 -z zip -b F32 -P 48 --no_remap_weights -remapbil,{data_path}{resolution}.txt'

def dp_Sbedrock():
    os.system(f"{remap_command} {data_path}Sbedrock_tmp1.nc4 {data_path}Sbedrock_tmp2.nc4")
    os.system(f"cdo mul {data_path}Sbedrock_tmp2.nc4 {mask_path}mask_combine_all.nc4 {data_path}Sbedrock.nc4")
    print(f'The Sbedrock has finished')  

def dp_Sr():
    os.system(f'{remap_command} {data_path}Sr_tmp1.nc4 {data_path}Sr_tmp2.nc4')
    os.system(f"cdo mul {data_path}Sr_tmp2.nc4 {mask_path}mask_combine_all.nc4 {data_path}Sr.nc4")
    print(f'The Sr has finished')    

dp_Sbedrock()
dp_Sr()