In [1]:
from temds.datasources import worldclim
from temds.datasources import crujra
from temds import downscalers
from temds import corrections
from temds import tile

import ctypes
libc = ctypes.CDLL("libc.so.6") # clearing cache 

# from affine import Affine



import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

import gc

from datetime import datetime


In [2]:
data_root = Path('/media/rwspicer/data/V3/tem/')

In [3]:
baseline_start_year = 1970
baseline_end_year = 2000
resolution = 4000

tiles_HV = [(7,15),(7, 16),(8,15),(8,16)]

tile_index_dataframe = gpd.read_file(data_root.joinpath('00-aoi/tile-index/'))

def select_tile(c_idx, tile_index_gdf):
    hdx = tile_index_gdf['H'] == c_idx[0]
    vdx = tile_index_gdf['V'] == c_idx[1]
    return tile_index_gdf[vdx & hdx].bounds


def tile_bounds_as_tuple(bounds_df):
    minx, maxx, miny, maxy = bounds_df[['minx','maxx','miny','maxy']].iloc[0]
    return minx, maxx, miny, maxy
    

for idx_HV in tiles_HV:
    print(tile_bounds_as_tuple(select_tile(idx_HV, tile_index_dataframe)))


(-1802000.0, -1402000.0, 2515000.0, 2915000.0)
(-1678000.0, -1402000.0, 2915000.0, 3315000.0)
(-1402000.0, -1002000.0, 2515000.0, 2915000.0)
(-1402000.0, -1002000.0, 2915000.0, 3315000.0)


In [4]:
wc_arctic = worldclim.WorldClim(data_root.joinpath('02-arctic/worldclim/worldclim-arctic.nc'))
working_crs = wc_arctic.dataset.rio.crs

In [5]:
import joblib
def process_tile(tile_HV, tile_index, resolution=4000, buffer=20, parallel=False):

    callback = lambda fn: int(Path(fn).name.split('.')[-4])

    # files = sorted(list(Path('/media/rwspicer/data/V3/tem/02-arctic/cru-jra/').glob('*.nc')))[:10]
    files = data_root.joinpath('02-arctic/cru-jra-fixed/')

    start = datetime.now()
    wc_arctic = worldclim.WorldClim(data_root.joinpath('02-arctic/worldclim/worldclim-arctic.nc'))
    working_crs = wc_arctic.dataset.rio.crs
    total = (datetime.now()-start).total_seconds()
    print(f'load wc arctic {total} seconds')
    
    start = datetime.now()
    cru_arctic_ts = crujra.AnnualTimeSeries(
        files, #  
        verbose=False,
        year_override_callback=callback, 
        in_memory=False
    )
    total = (datetime.now()-start).total_seconds()
    print(f'load arctic cru {total} seconds, {total/120} s/year ')

    
    bounds = tile_bounds_as_tuple(select_tile(tile_HV, tile_index))
    mytile = tile.Tile(tile_HV, bounds, resolution, working_crs, buffer_px=buffer)

    start = datetime.now()
    mytile.import_normalized('worldclim',  wc_arctic, clip_with='xarray')
    total = (datetime.now()-start).total_seconds()
    print(f'import wc {total} seconds')
    del(wc_arctic)
    gc.collect()

    start = datetime.now()
    if parallel:
        with joblib.parallel_config(backend="loky", n_jobs=24, verbose=20):
            mytile.import_normalized('cru_AnnualTimeSeries', cru_arctic_ts, parallel=True)
    else:
        mytile.import_normalized('cru_AnnualTimeSeries', cru_arctic_ts, parallel=False)
    total = (datetime.now()-start).total_seconds()
    print(f'import cru {total} seconds, {total/120} s/year ')
    del(cru_arctic_ts)
    gc.collect()

    libc.malloc_trim(0)

    if not (mytile.data['cru_AnnualTimeSeries'][1950].dataset.x == mytile.data['worldclim'].x).all():
        raise TypeError('x misaligned')
    if not (mytile.data['cru_AnnualTimeSeries'][1950].dataset.y == mytile.data['worldclim'].y).all():
        raise TypeError('y misaligned')
    

    
    # for year in mytile.data['cru_AnnualTimeSeries'].range():
    #     mytile.data['cru_AnnualTimeSeries'][year].dataset['x'] = mytile.data['worldclim'].x
    #     mytile.data['cru_AnnualTimeSeries'][year].dataset['y'] = mytile.data['worldclim'].y

    start = datetime.now()
    mytile.calculate_climate_baseline(baseline_start_year, baseline_end_year, 'cru_baseline', 'cru_AnnualTimeSeries')
    total = (datetime.now()-start).total_seconds()
    print(f'baseline {total} seconds')
    
    variables_cf = {
        'tmax': {'function': 'temperature', 'reference': 'tmax','baseline':'tmax', 'name': 'tmax'},
        'tmin': {'function': 'temperature', 'reference': 'tmin','baseline':'tmin', 'name': 'tmin'},
        'tavg': {'function': 'temperature', 'reference': 'tavg','baseline':'tmp', 'name': 'tavg'},
        'prec': {'function': 'precipitation', 'reference': 'prec','baseline':'pre', 'name': 'prec'},
    
        'vapo_pa': {
            'function': 'vapor-pressure', 
            'reference': 'vapr',
            'baseline-pres':'pres', 
            'baseline-spfh': 'spfh',
            'name': 'vapo'
            },
        'nirr': {
            'function': 'radiation', 
            'reference': 'srad',
            'baseline':'dswrf', 
            'name': 'nirr'
            
        },
        'ws': {
            'function': 'wind-speed', 
            'reference': 'wind',
            'baseline-ugrd': 'ugrd', 
            'baseline-vgrd': 'vgrd', 
            'name': 'ws'
            
        }
    }
    start = datetime.now()
    mytile.toggle_verbose()
    mytile.calculate_correction_factors('cru_baseline', 'worldclim', variables_cf)
    mytile.toggle_verbose()
    total = (datetime.now()-start).total_seconds()
    print(f'cc-factor {total} seconds')

    variables_ds = {
        'tmax': {'function': 'temperature', 'temperature': 'tmax','correction_factor':'tmax', 'name': 'tmax'},
        'tmin': {'function': 'temperature', 'temperature': 'tmin','correction_factor':'tmin', 'name': 'tmin'},
        'tavg': {'function': 'temperature', 'temperature': 'tmp','correction_factor':'tavg', 'name': 'tavg'},
        'prec': {'function': 'precipitation', 'precipitation': 'pre','correction_factor':'prec', 'name': 'prec'},
        'vapo': {'function': 'vapor-pressure', 'name': 'vapo', 'pres':'pres', 'spfh':'spfh', 'correction_factor': 'vapo' },
        'nirr': {'function':'radiation', 'name': 'nirr', 'dswrf':'dswrf','correction_factor': 'nirr'},
        'wind': {'function':'wind-speed', 'name': 'wind', 'ugrd': 'ugrd', 'vgrd':'vgrd', 'correction_factor':'ws'},
        'winddir': {'function':'wind-direction', 'name': 'winddir', 'ugrd': 'ugrd', 'vgrd':'vgrd' },
    }

    start = datetime.now()
    mytile.toggle_verbose()
    if parallel:
        with joblib.parallel_config(backend="loky", n_jobs=24, verbose=20):
            mytile.downscale_timeseries('downscaled_cru', 'cru_AnnualTimeSeries','correction_factors', variables_ds, True)
    else:
        mytile.downscale_timeseries('downscaled_cru', 'cru_AnnualTimeSeries','correction_factors', variables_ds, False)
    mytile.toggle_verbose()
    total = (datetime.now()-start).total_seconds()
    print(f'downscale {total} seconds, {total/120} s/year')
    
    gc.collect()
    return mytile

In [6]:
from osgeo import gdal

tiles_HV = [(7,15),(7, 16),(8,15),(8,16)]
tiles_HV = [(11,19),(22,12), (0,8), (12,0) ] ## edge tiles
start = datetime.now()
processed_tiles = {}
gc.collect()
for idx_HV in tiles_HV:
    mytile = process_tile(
        idx_HV, tile_index_dataframe, parallel=True
    )
    mytile.toggle_verbose()
    mytile.save(data_root.joinpath('03-tiles-testing'), overwrite=True, use_zlib=False, clear_existing=True)
    mytile.toggle_verbose()
    del(mytile)
    gc.collect(),libc.malloc_trim(0)
total = (datetime.now()-start).total_seconds()
print(total, total/len(tiles_HV))

load wc arctic 0.067971 seconds
load arctic cru 0.008094 seconds, 6.745e-05 s/year 
import wc 1.23841 seconds
parallel


[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=24)]: Done   1 tasks      | elapsed:   20.5s
[Parallel(n_jobs=24)]: Done   2 tasks      | elapsed:   21.1s
[Parallel(n_jobs=24)]: Done   3 tasks      | elapsed:   21.6s
[Parallel(n_jobs=24)]: Done   4 tasks      | elapsed:   22.0s
[Parallel(n_jobs=24)]: Done   5 tasks      | elapsed:   22.5s
[Parallel(n_jobs=24)]: Done   6 tasks      | elapsed:   22.9s
[Parallel(n_jobs=24)]: Done   7 tasks      | elapsed:   23.4s
[Parallel(n_jobs=24)]: Done   8 tasks      | elapsed:   23.9s
[Parallel(n_jobs=24)]: Done   9 tasks      | elapsed:   24.4s
[Parallel(n_jobs=24)]: Done  10 tasks      | elapsed:   24.9s
[Parallel(n_jobs=24)]: Done  11 tasks      | elapsed:   25.4s
[Parallel(n_jobs=24)]: Done  12 tasks      | elapsed:   25.9s
[Parallel(n_jobs=24)]: Done  13 tasks      | elapsed:   26.4s
[Parallel(n_jobs=24)]: Done  14 tasks      | elapsed:   27.0s
[Parallel(n_jobs=24)]: Done  15 tasks      | elapsed:  

Checking Continuity (basic)
Data is continuous
Checking Continuity (advanced)
-- Checking 1901
-- Checking 1902
-- Checking 1903
-- Checking 1904
-- Checking 1905
-- Checking 1906
-- Checking 1907
-- Checking 1908
-- Checking 1909
-- Checking 1910
-- Checking 1911
-- Checking 1912
-- Checking 1913
-- Checking 1914
-- Checking 1915
-- Checking 1916
-- Checking 1917
-- Checking 1918
-- Checking 1919
-- Checking 1920
-- Checking 1921
-- Checking 1922
-- Checking 1923
-- Checking 1924
-- Checking 1925
-- Checking 1926
-- Checking 1927
-- Checking 1928
-- Checking 1929
-- Checking 1930
-- Checking 1931
-- Checking 1932
-- Checking 1933
-- Checking 1934
-- Checking 1935
-- Checking 1936
-- Checking 1937
-- Checking 1938
-- Checking 1939
-- Checking 1940
-- Checking 1941
-- Checking 1942
-- Checking 1943
-- Checking 1944
-- Checking 1945
-- Checking 1946
-- Checking 1947
-- Checking 1948
-- Checking 1949
-- Checking 1950
-- Checking 1951
-- Checking 1952
-- Checking 1953
-- Checking 1954
-- C

[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=24)]: Done   1 tasks      | elapsed:   26.6s
[Parallel(n_jobs=24)]: Done   2 tasks      | elapsed:   27.4s
[Parallel(n_jobs=24)]: Done   3 tasks      | elapsed:   28.2s
[Parallel(n_jobs=24)]: Done   4 tasks      | elapsed:   29.2s
[Parallel(n_jobs=24)]: Done   5 tasks      | elapsed:   30.1s
[Parallel(n_jobs=24)]: Done   6 tasks      | elapsed:   31.0s
[Parallel(n_jobs=24)]: Done   7 tasks      | elapsed:   31.9s
[Parallel(n_jobs=24)]: Done   8 tasks      | elapsed:   32.8s
[Parallel(n_jobs=24)]: Done   9 tasks      | elapsed:   33.7s
[Parallel(n_jobs=24)]: Done  10 tasks      | elapsed:   34.6s
[Parallel(n_jobs=24)]: Done  11 tasks      | elapsed:   35.5s
[Parallel(n_jobs=24)]: Done  12 tasks      | elapsed:   36.3s
[Parallel(n_jobs=24)]: Done  13 tasks      | elapsed:   37.2s
[Parallel(n_jobs=24)]: Done  14 tasks      | elapsed:   38.1s
[Parallel(n_jobs=24)]: Done  15 tasks      | elapsed:  

downscale 132.553865 seconds, 1.1046155416666668 s/year
Saving: worldclim
Saving: cru_AnnualTimeSeries
Saving: cru_baseline
Saving: correction_factors
Saving: downscaled_cru
load wc arctic 0.067771 seconds
load arctic cru 0.00875 seconds, 7.291666666666667e-05 s/year 
import wc 3.093394 seconds
parallel


[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=24)]: Done   1 tasks      | elapsed:   19.6s
[Parallel(n_jobs=24)]: Done   2 tasks      | elapsed:   20.2s
[Parallel(n_jobs=24)]: Done   3 tasks      | elapsed:   20.7s
[Parallel(n_jobs=24)]: Done   4 tasks      | elapsed:   21.2s
[Parallel(n_jobs=24)]: Done   5 tasks      | elapsed:   21.6s
[Parallel(n_jobs=24)]: Done   6 tasks      | elapsed:   22.1s
[Parallel(n_jobs=24)]: Done   7 tasks      | elapsed:   22.6s
[Parallel(n_jobs=24)]: Done   8 tasks      | elapsed:   23.2s
[Parallel(n_jobs=24)]: Done   9 tasks      | elapsed:   23.7s
[Parallel(n_jobs=24)]: Done  10 tasks      | elapsed:   24.2s
[Parallel(n_jobs=24)]: Done  11 tasks      | elapsed:   24.8s
[Parallel(n_jobs=24)]: Done  12 tasks      | elapsed:   25.3s
[Parallel(n_jobs=24)]: Done  13 tasks      | elapsed:   25.9s
[Parallel(n_jobs=24)]: Done  14 tasks      | elapsed:   26.4s
[Parallel(n_jobs=24)]: Done  15 tasks      | elapsed:  

Checking Continuity (basic)
Data is continuous
Checking Continuity (advanced)
-- Checking 1901
-- Checking 1902
-- Checking 1903
-- Checking 1904
-- Checking 1905
-- Checking 1906
-- Checking 1907
-- Checking 1908
-- Checking 1909
-- Checking 1910
-- Checking 1911
-- Checking 1912
-- Checking 1913
-- Checking 1914
-- Checking 1915
-- Checking 1916
-- Checking 1917
-- Checking 1918
-- Checking 1919
-- Checking 1920
-- Checking 1921
-- Checking 1922
-- Checking 1923
-- Checking 1924
-- Checking 1925
-- Checking 1926
-- Checking 1927
-- Checking 1928
-- Checking 1929
-- Checking 1930
-- Checking 1931
-- Checking 1932
-- Checking 1933
-- Checking 1934
-- Checking 1935
-- Checking 1936
-- Checking 1937
-- Checking 1938
-- Checking 1939
-- Checking 1940
-- Checking 1941
-- Checking 1942
-- Checking 1943
-- Checking 1944
-- Checking 1945
-- Checking 1946
-- Checking 1947
-- Checking 1948
-- Checking 1949
-- Checking 1950
-- Checking 1951
-- Checking 1952
-- Checking 1953
-- Checking 1954
-- C

[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=24)]: Done   1 tasks      | elapsed:   22.8s
[Parallel(n_jobs=24)]: Done   2 tasks      | elapsed:   23.7s
[Parallel(n_jobs=24)]: Done   3 tasks      | elapsed:   24.5s
[Parallel(n_jobs=24)]: Done   4 tasks      | elapsed:   25.3s
[Parallel(n_jobs=24)]: Done   5 tasks      | elapsed:   26.2s
[Parallel(n_jobs=24)]: Done   6 tasks      | elapsed:   27.2s
[Parallel(n_jobs=24)]: Done   7 tasks      | elapsed:   28.0s
[Parallel(n_jobs=24)]: Done   8 tasks      | elapsed:   29.0s
[Parallel(n_jobs=24)]: Done   9 tasks      | elapsed:   29.9s
[Parallel(n_jobs=24)]: Done  10 tasks      | elapsed:   30.8s
[Parallel(n_jobs=24)]: Done  11 tasks      | elapsed:   31.7s
[Parallel(n_jobs=24)]: Done  12 tasks      | elapsed:   32.7s
[Parallel(n_jobs=24)]: Done  13 tasks      | elapsed:   33.6s
[Parallel(n_jobs=24)]: Done  14 tasks      | elapsed:   34.5s
[Parallel(n_jobs=24)]: Done  15 tasks      | elapsed:  

downscale 132.953202 seconds, 1.10794335 s/year
Saving: worldclim
Saving: cru_AnnualTimeSeries


RuntimeError: NetCDF: HDF error

In [None]:
gc.collect(),libc.malloc_trim(0)

In [None]:
min_single_core = 2604.244333/60
min_single_core
#import cru 2400.089917 seconds, 20.000749308333333 s/year 
#downscale 166.991653 seconds, 1.3915971083333334 s/year

In [None]:
min24= 469.627201/60
min24
# downscale 217.761886 seconds, 1.8146823833333334 s/year
# import cru 217.77511 seconds, 1.8147925833333334 s/year 

In [None]:
min24_new=421.573156/60
min24_new
#downscale 223.979083 seconds, 1.8664923583333333 s/year
#import cru 157.333171 seconds, 1.3111097583333333 s/year 

In [None]:
601.4049515/60