In [102]:
import s3fs
import xarray as xr
import numpy as np
import coiled

In [103]:
CITYLATLON = {}
with open('ghsl_500k.csv', 'r') as ifile:
    for line in ifile.readlines():
        items = [i.strip() for i in line.split(',')]
        CITYLATLON['city_{0}'.format(items[0])] = (float(items[2]), float(items[3]), int(items[0]))

In [104]:
PERCENTILE_STARTYEAR = 1980
PERCENTILE_ENDYEAR = 2019

In [105]:
ERA_TRANSFORMS = {'air_temperature_at_2_metres': lambda x: x - 273.5, 'precipitation_amount_1hour_Accumulation': lambda x: x * 1000}

In [106]:
def s3open(path):
    fs = s3fs.S3FileSystem(anon=True, default_fill_cache=False)
    return s3fs.S3Map(path, s3=fs)

In [107]:
MONTHS = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]

In [123]:
%%time
def returnperiod_value_daily(timeseries, rp):
    d = np.delete(timeseries, np.argwhere(timeseries <= 0.01).flatten())  # Only consider actual positive events
    vals, counts = np.unique(d, return_counts=True)
    freqs = counts / d.size
    cdf_y = np.cumsum(freqs)
    targetfreq = (PERCENTILE_ENDYEAR - PERCENTILE_STARTYEAR + 1) / rp
    return np.interp(1-targetfreq, vals, cdf_y)

def pr_rp100_onecity(latlonid):
    varname='precipitation_amount_1hour_Accumulation'
    FILE_PATTERN = 'era5-pds/zarr/{year}/{month}/data/{varname}.zarr/'
    files_mapper = [s3open(FILE_PATTERN.format(year=year, month=month, varname=varname)) for year in range(PERCENTILE_STARTYEAR,PERCENTILE_ENDYEAR+1) for month in MONTHS]
    pr_ds = xr.open_mfdataset(files_mapper, engine='zarr', 
           concat_dim=['time0', 'time1'][int(varname=='precipitation_amount_1hour_Accumulation')], combine='nested', 
           coords='minimal', compat='override', parallel=True)
    pr_dailysum = ERA_TRANSFORMS[varname](pr_ds.resample(time1='D').sum())
    ts = pr_dailysum[varname].sel(lat=latlonid[0], lon=latlonid[1], method='nearest')
    return returnperiod_value_daily(ts.to_numpy(), 100), latlonid[2]

cluster = coiled.Cluster(n_workers=25)
client = cluster.get_client()
for c in CITYLATLON:
    fut = client.submit(pr_rp100_onecity, (CITYLATLON[c][0], CITYLATLON[c][1], CITYLATLON[c][2]))
    with open('precip_rp100.txt', 'a') as ofile:
        res, loc_id = fut.result()
        print('{0}:{1}'.format(loc_id, res), end=' ')
        ofile.write('{0}\t{1}\n'.format(loc_id, res))


Output()

Output()

0:0.7080395857307249 1:0.46731551569506724 2:0.531118995544755 3:0.4753186709539121 4:0.5922691679273828 5:0.42760500816548713 6:0.46055580132716667 7:0.45676553564620664 8:0.5922691679273828 9:0.7494615563298489 10:0.7948804469273744 11:0.4381922198567834 12:0.5416796304073919 13:0.6350781341107872 14:0.692972027972028 15:0.778226728110599 16:0.30095253697383384 17:0.7609819905213271 18:0.7599119617224879 19:0.6631447513812155 20:0.708375347912525 21:0.5727315476190477 22:0.7735279742765274 23:0.666477093206951 24:0.34592404621757145 25:0.6561984911986589 26:0.7638815533980583 27:0.6322275960170697 28:0.6680068107400132 29:0.6511142469470828 30:0.6680068107400132 31:0.7735279742765274 32:0.30728170964181795 33:0.3674662287903667 34:0.42760500816548713 35:0.7652764199655765 36:0.5744983203605081 37:0.6511142469470828 38:0.7638815533980583 39:0.6322275960170697 40:0.7700850111856823 41:0.7788811287477954 42:0.5727315476190477 43:0.7178419354838709 44:0.596015248721525 45:0.4576084210526

KeyboardInterrupt: 

In [124]:
cluster.shutdown()