# The temperature quantiles

In [3]:
import os
import sys

project_path = os.path.abspath(os.path.join('..', '..'))
if project_path not in sys.path:
    sys.path.insert(0, project_path)
    
from source.config import DATA_SRC, WEATHER_SRC

from pathlib import Path
import numpy as np
import xarray as xr

import dask
from dask.diagnostics import ProgressBar



In [2]:
MAX_YEAR = 2020
REFERENCE_YEAR_START = 1986
REFERENCE_YEAR_END = 2005

TEMPERATURES_FOLDER = WEATHER_SRC / 'era5_0.25deg' / 'daily_temperature_summary'


In [3]:
def year_from_filename(name):
    return int(name.split('_')[-3][-4:])


In [4]:
CLIMATOLOGY_QUANTILES_FOLDER = WEATHER_SRC / 'era5_0.25deg/quantiles'

In [5]:
# CLIMATOLOGY_QUANTILES_FOLDER.mkdir()

In [6]:
# QUANTILES = np.arange(0, 1, 0.01)
QUANTILES = [0.95]

## Tmax

In [7]:
t_var = 'tmax'

In [8]:
file_list = sorted([p for p in TEMPERATURES_FOLDER.rglob('*.nc')
                    if year_from_filename(p.name) >= REFERENCE_YEAR_START 
                    and year_from_filename(p.name) <= REFERENCE_YEAR_END 
                   ]
                  )

In [9]:
# with dask.config.set(**{'array.slicing.split_large_chunks': False}):
daily_temperatures = xr.open_mfdataset(file_list, combine='by_coords', chunks ={'latitude': 100, 'longitude': 100})['t_max']

daily_temperatures = daily_temperatures.chunk({'time': -1})

In [10]:
daily_temperatures

Unnamed: 0,Array,Chunk
Bytes,28.16 GiB,277.75 MiB
Shape,"(7281, 721, 1440)","(7281, 100, 100)"
Count,4940 Tasks,120 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 28.16 GiB 277.75 MiB Shape (7281, 721, 1440) (7281, 100, 100) Count 4940 Tasks 120 Chunks Type float32 numpy.ndarray",1440  721  7281,

Unnamed: 0,Array,Chunk
Bytes,28.16 GiB,277.75 MiB
Shape,"(7281, 721, 1440)","(7281, 100, 100)"
Count,4940 Tasks,120 Chunks
Type,float32,numpy.ndarray


In [11]:
t_var

'tmax'

In [12]:
CLIMATOLOGY_QUANTILES = (CLIMATOLOGY_QUANTILES_FOLDER / 
                         f'daily_{t_var}_quantiles_{"_".join([str(int(100*q)) for q in QUANTILES])}_1986-2005.nc')

In [13]:
daily_quantiles = daily_temperatures.quantile(QUANTILES, dim='time')

In [14]:
with dask.config.set(scheduler='processes'), ProgressBar():
    daily_quantiles = daily_quantiles.compute()
    daily_quantiles.to_netcdf(CLIMATOLOGY_QUANTILES)

[########################################] | 100% Completed |  1hr  3min 58.9s


In [15]:
with ProgressBar():
    daily_quantiles.to_netcdf(CLIMATOLOGY_QUANTILES)

## Tmin

In [16]:
t_var = 'tmin'

In [17]:
file_list = sorted([p for p in TEMPERATURES_FOLDER.rglob('*.nc')
                    if year_from_filename(p.name) >= REFERENCE_YEAR_START 
                    and year_from_filename(p.name) <= REFERENCE_YEAR_END 
                   ]
                  )

In [18]:
# with dask.config.set(**{'array.slicing.split_large_chunks': False}):
daily_temperatures = xr.open_mfdataset(file_list, combine='by_coords', chunks ={'latitude': 100, 'longitude': 100})['t_min']
daily_temperatures = daily_temperatures.chunk({'time': -1})

In [19]:
CLIMATOLOGY_QUANTILES = (CLIMATOLOGY_QUANTILES_FOLDER / 
                         f'daily_{t_var}_quantiles_{"_".join([str(int(100*q)) for q in QUANTILES])}_1986-2005.nc')

In [20]:
daily_quantiles = daily_temperatures.quantile(QUANTILES, dim='time')

with dask.config.set(scheduler='processes'), ProgressBar():
    daily_quantiles = daily_quantiles.compute()
    daily_quantiles.to_netcdf(CLIMATOLOGY_QUANTILES)

[########################################] | 100% Completed |  1hr  9min 13.0s


## Tmean

In [21]:
t_var = 'tmean'

In [22]:
file_list = sorted([p for p in TEMPERATURES_FOLDER.rglob('*.nc')
                    if year_from_filename(p.name) >= REFERENCE_YEAR_START 
                    and year_from_filename(p.name) <= REFERENCE_YEAR_END 
                   ]
                  )

In [23]:
# with dask.config.set(**{'array.slicing.split_large_chunks': False}):
daily_temperatures = xr.open_mfdataset(file_list, combine='by_coords', chunks ={'latitude': 100, 'longitude': 100})['t_mean']
daily_temperatures = daily_temperatures.chunk({'time': -1})

In [24]:
CLIMATOLOGY_QUANTILES = (CLIMATOLOGY_QUANTILES_FOLDER / 
                         f'daily_{t_var}_quantiles_{"_".join([str(int(100*q)) for q in QUANTILES])}_1986-2005.nc')

In [None]:
daily_quantiles = daily_temperatures.quantile(QUANTILES, dim='time')

with dask.config.set(scheduler='processes'), ProgressBar():
    daily_quantiles = daily_quantiles.compute()
    daily_quantiles.to_netcdf(CLIMATOLOGY_QUANTILES)

[#################################       ] | 84% Completed |  1hr  1min 13.1s

In [None]:
daily_quantiles = daily_temperatures.quantile(QUANTILES, dim='time')

In [None]:
with ProgressBar():
    daily_quantiles.to_netcdf(CLIMATOLOGY_QUANTILES)

In [None]:
daily_quantiles