# The temperature quantiles

In [1]:
from pathlib import Path

import numpy as np
import xarray as xr

import dask
from dask.diagnostics import ProgressBar

sys.path.insert(0, '../..')
from util import DATA_DIR

In [4]:
MAX_YEAR = 2020
REFERENCE_YEAR_START = 1986
REFERENCE_YEAR_END = 2005

TEMPERATURES_FOLDER = DATA_DIR / "era5_0.25deg/daily_temperature_summary"


In [5]:
def year_from_filename(name):
    return int(name.split('_')[-3][-4:])


In [6]:
CLIMATOLOGY_QUANTILES_FOLDER = DATA_DIR / "era5_0.25deg/quantiles"

In [5]:
# CLIMATOLOGY_QUANTILES_FOLDER.mkdir()

In [7]:
# QUANTILES = np.arange(0, 1, 0.01)
QUANTILES = [0.05,0.95]

## Tmax

In [7]:
t_var = 'tmax'

In [8]:
file_list = sorted([p for p in TEMPERATURES_FOLDER.rglob('*.nc')
                    if year_from_filename(p.name) >= REFERENCE_YEAR_START 
                    and year_from_filename(p.name) <= REFERENCE_YEAR_END 
                   ]
                  )

In [9]:
# with dask.config.set(**{'array.slicing.split_large_chunks': False}):
daily_temperatures = xr.open_mfdataset(file_list, combine='by_coords', chunks ={'latitude': 100, 'longitude': 100})['t_max']

daily_temperatures = daily_temperatures.chunk({'time': -1})

In [10]:
daily_temperatures

Unnamed: 0,Array,Chunk
Bytes,28.16 GiB,277.75 MiB
Shape,"(7281, 721, 1440)","(7281, 100, 100)"
Count,4940 Tasks,120 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 28.16 GiB 277.75 MiB Shape (7281, 721, 1440) (7281, 100, 100) Count 4940 Tasks 120 Chunks Type float32 numpy.ndarray",1440  721  7281,

Unnamed: 0,Array,Chunk
Bytes,28.16 GiB,277.75 MiB
Shape,"(7281, 721, 1440)","(7281, 100, 100)"
Count,4940 Tasks,120 Chunks
Type,float32,numpy.ndarray


In [11]:
CLIMATOLOGY_QUANTILES = (CLIMATOLOGY_QUANTILES_FOLDER / 
                         f'daily_{t_var}_quantiles_{"_".join([str(int(100*q)) for q in QUANTILES])}_1986-2005.nc')

In [12]:
daily_quantiles = daily_temperatures.quantile(QUANTILES, dim='time')

In [13]:
with dask.config.set(scheduler='processes'), ProgressBar():
    daily_quantiles = daily_quantiles.compute()
    daily_quantiles.to_netcdf(CLIMATOLOGY_QUANTILES)

[########################################] | 100% Completed | 50min  6.9s


In [14]:
with ProgressBar():
    daily_quantiles.to_netcdf(CLIMATOLOGY_QUANTILES)

## Tmin

In [15]:
t_var = 'tmin'

In [16]:
file_list = sorted([p for p in TEMPERATURES_FOLDER.rglob('*.nc')
                    if year_from_filename(p.name) >= REFERENCE_YEAR_START 
                    and year_from_filename(p.name) <= REFERENCE_YEAR_END 
                   ]
                  )

In [17]:
# with dask.config.set(**{'array.slicing.split_large_chunks': False}):
daily_temperatures = xr.open_mfdataset(file_list, combine='by_coords', chunks ={'latitude': 100, 'longitude': 100})['t_min']
daily_temperatures = daily_temperatures.chunk({'time': -1})

In [18]:
CLIMATOLOGY_QUANTILES = (CLIMATOLOGY_QUANTILES_FOLDER / 
                         f'daily_{t_var}_quantiles_{"_".join([str(int(100*q)) for q in QUANTILES])}_1986-2005.nc')

In [19]:
daily_quantiles = daily_temperatures.quantile(QUANTILES, dim='time')

with dask.config.set(scheduler='processes'), ProgressBar():
    daily_quantiles = daily_quantiles.compute()
    daily_quantiles.to_netcdf(CLIMATOLOGY_QUANTILES)

[########################################] | 100% Completed | 51min  2.8s
