# `precip-dot` data test:
## Consistent estimates in combined data

Verify that estimates in the combined data (product of warped deltas Atlas 14 data) are consistent.

### inputs

The path to the directory containing the "combined" data to test needs to be specified in the `UNDIFF_DIR` env var prior to running this notebook. Current value for `UNDIFF_DIR`:

In [1]:
!echo $UNDIFF_DIR

/workspace/Shared/Tech_Projects/DOT/project_data/wrf_pcpt/undiff


The data will be tested at all valid (non-NaN) grid cells.

In [14]:
def transform_wgs84(lon, lat):
    transformer = Transformer.from_crs(4326, 3338, always_xy=True)
    return transformer.transform(lon, lat)


def read_data(fp):
#     fp = args[0]
#     sel_coords = args[1]
    ds = xr.open_dataset(fp)
    ds.close()
    return ds["pf"].values


def test_table(arr):
    """
    check that an array of pf estimates, arranged in the 
    same manner as the NOAA Atlas 14 tables, is consistent 
    """
    intervals_ok = np.all(arr[:, 1:] > arr[:, :-1])
    durations_ok = np.all(arr[1:, :] > arr[:-1, :])
    if intervals_ok and durations_ok:
        return True
    else:
        return False    
   
    
def run_test(data_dir):
    """
    take output data directory and list of test coordinates (tuples)
    """
    print("Beginning test of consistent estimates in final data.\n")
    # durations to read
    durations = [
        "60m",
        "2h",
        "3h",
        "6h",
        "12h",
        "24h",
        "2d",
        "3d",
        "4d",
        "7d",
        "10d",
        "20d",
        "30d",
        "45d",
        "60d",
    ]

#     # transform wgs84 coords
#     sel_coords = [transform_wgs84(*coords) for coords in wgs84_coords]
    
    # template path
    data_fp = os.path.join(data_dir, "pcpt_{}_sum_wrf_{}_{}_undiff.nc")
    
    # test all locations for each future period
    gcms = ["GFDL-CM3", "NCAR-CCSM4"]
    periods = ["2020-2049", "2050-2079", "2080-2099"]
    results = []
    for gcm in gcms:
        for period in periods:
            # construct args for reading data in parallel
            fps = [
                data_fp.format(gcm, duration, period) for duration in durations
            ]
            
            print("Working on data for {}, {}".format(gcm, period))
            print("reading data...")
            # read from each duration in parallel
            p = Pool(14)
            est_arrs = p.map(read_data, fps)
            p.close()
            p.join()

            print("data read, preparing data...")
            # combine into arrays for each location
    #         out_arrs = [
    #             np.array([est_lst[i] for est_lst in out]) for i in np.arange(len(wgs84_coords))
    #         ]

            ### IF out was actually full grid for each period
            # to construct the tables..
            # make list of prepared arrays (prepared as A14 tables) to parallelize
            # only need to loop over xy indices where not NaN, which is assumed to be 
            #   same for all durations
    #         valid_idx = np.argwhere(~np.isnan(est_arrs[0][0,:,:]))
    #         valid_idx = [idx for idx in valid_idx]

            # reordering of axes to get data aligned properly
            # create an array of arrays, each arranged like the Atlas 14 tables
            read_shp = est_arrs[0].shape
            n = read_shp[1] * read_shp[2]
            r = len(est_arrs)
            c = read_shp[0]
            tables_arr = np.array(est_arrs).swapaxes(1,3).swapaxes(0,2).reshape(n, r, c)
            # subset to only valid (non-NaN) cells
            valid_bool = ~np.isnan(tables_arr[:,0,0])
            valid_tables = tables_arr[valid_bool] # saving tables_arr for possible troubleshooting later

    #         p = Pool(28)
    #         prepped_arrs = p.map(make_table_array, valid_idx)
    #         p.close()
    #         p.join()

    #         prepped_arrs = [
    #             np.array([est_arr[:,idx[0],idx[1]] for est_arr in out])
    #             for idx in valid_idx
    #         ]

            print("data prepared, testing data...")
            p = Pool(28)
            out = p.map(test_table, valid_tables)
            p.close()
            p.join()


    # #         # test the output arrays for each location
    # #         results.append(np.all([test_array(arr) for arr in out_arrs]))
            result = np.all(out)

            results.append(result)
            print("{}, {} complete\n".format(gcm, period))

    final_result = np.all(results)
    # print results
    if final_result:
        print("\nTest result: PASS")
        print("No inconsistencies in estimates found.\n")
    else:
        print("\nTest result: FAIL\n")


In [13]:
import os, time, datetime
import numpy as np
import xarray as xr
from multiprocessing import Pool
from pyproj import Transformer

data_dir = os.getenv("UNDIFF_DIR")

tic = time.perf_counter()

test = run_test(data_dir)

print("Elapsed time: {} s\n".format(round(time.perf_counter() - tic, 1)))

utc_time = datetime.datetime.utcnow()
print("Completion time of previous test: {}".format(utc_time.strftime("%Y-%m-%d %H:%M:%S")))

Beginning test of consistent estimates in final data.

Working on data for GFDL-CM3, 2020-2049
reading data...
data read, preparing data...
data prepared, testing data...
GFDL-CM3, 2020-2049 complete

Working on data for GFDL-CM3, 2050-2079
reading data...
data read, preparing data...
data prepared, testing data...
GFDL-CM3, 2050-2079 complete

Working on data for GFDL-CM3, 2080-2099
reading data...
data read, preparing data...
data prepared, testing data...
GFDL-CM3, 2080-2099 complete

Working on data for NCAR-CCSM4, 2020-2049
reading data...
data read, preparing data...
data prepared, testing data...
NCAR-CCSM4, 2020-2049 complete

Working on data for NCAR-CCSM4, 2050-2079
reading data...
data read, preparing data...
data prepared, testing data...
NCAR-CCSM4, 2050-2079 complete

Working on data for NCAR-CCSM4, 2080-2099
reading data...
data read, preparing data...
data prepared, testing data...
NCAR-CCSM4, 2080-2099 complete


Test result: PASS
No inconsistencies in estimates found.

NameError: name 'failed' is not defined

'2020-10-07 19:45:05'

In [38]:
x = np.arange(16).reshape(2,2,2,2)
y = x.copy().astype("float")

In [23]:
x

array([[[[ 0,  1],
         [ 2,  3]],

        [[ 4,  5],
         [ 6,  7]]],


       [[[ 8,  9],
         [10, 11]],

        [[12, 13],
         [14, 15]]]])

In [22]:
x.reshape(4,2,2)

array([[[ 0,  1],
        [ 2,  3]],

       [[ 4,  5],
        [ 6,  7]],

       [[ 8,  9],
        [10, 11]],

       [[12, 13],
        [14, 15]]])

In [25]:
x.swapaxes(1,3).swapaxes(0,2)

array([[[[ 0,  4],
         [ 8, 12]],

        [[ 1,  5],
         [ 9, 13]]],


       [[[ 2,  6],
         [10, 14]],

        [[ 3,  7],
         [11, 15]]]])

In [21]:
idx = [0,1]
np.array([np.array([x[i,:,j,k] for i in idx]) for j in idx for k in idx])

array([[[ 0,  4],
        [ 8, 12]],

       [[ 1,  5],
        [ 9, 13]],

       [[ 2,  6],
        [10, 14]],

       [[ 3,  7],
        [11, 15]]])

In [46]:
y[:,:,0,1] = np.nan
valid_idx = np.argwhere(np.isnan(y[0,0,:,:]))

In [52]:
y.swapaxes(1,3).swapaxes(0,2).reshape(4,2,2)

array([[[ 0.,  4.],
        [ 8., 12.]],

       [[nan, nan],
        [nan, nan]],

       [[ 2.,  6.],
        [10., 14.]],

       [[ 3.,  7.],
        [11., 15.]]])

In [51]:
new_y = y.swapaxes(1,3).swapaxes(0,2).reshape(4,2,2)
new_y = new_y[~np.isnan(new_y[:,0,0])]
new_y

array([[[ 0.,  4.],
        [ 8., 12.]],

       [[ 2.,  6.],
        [10., 14.]],

       [[ 3.,  7.],
        [11., 15.]]])

In [None]:
new

In [1]:
import os, time
import numpy as np
import xarray as xr
from multiprocessing import Pool
from pyproj import Transformer



In [43]:
data_dir = os.getenv("COMBINED_DIR")
fp = os.path.join(data_dir, "pcpt_GFDL-CM3_sum_wrf_{}_2020-2049_combined.nc")
durations = [
    "60m",
    "2h",
    "3h",
    "6h",
    "12h",
    "24h",
    #"48h", add when available
    "3d",
    "4d",
    "7d",
    "10d",
    "20d",
    "30d",
    "45d",
    "60d",
]

ds = xr.open_dataset(fp.format("3d"))
xc = ds.xc.values[5200]
yc = ds.yc.values[2330]
arr = []
for duration in durations:
    ds = xr.open_dataset(fp.format(duration))
    #arr.append(ds.sel(xc=xc, yc=yc).pf.values)
    arr.append(ds.pf.values[:,2330,5200])
    ds.close()
    
arr = np.round(np.array(arr), 2)

In [37]:
test_array(arr)

False

In [57]:
!ls /workspace/Shared/Tech_Projects/DOT/project_data/wrf_pcpt/undiff

pcpt_GFDL-CM3_sum_wrf_10d_2020-2049_undiff.nc
pcpt_GFDL-CM3_sum_wrf_10d_2050-2079_undiff.nc
pcpt_GFDL-CM3_sum_wrf_10d_2080-2099_undiff.nc
pcpt_GFDL-CM3_sum_wrf_12h_2020-2049_undiff.nc
pcpt_GFDL-CM3_sum_wrf_12h_2050-2079_undiff.nc
pcpt_GFDL-CM3_sum_wrf_12h_2080-2099_undiff.nc
pcpt_GFDL-CM3_sum_wrf_20d_2020-2049_undiff.nc
pcpt_GFDL-CM3_sum_wrf_20d_2050-2079_undiff.nc
pcpt_GFDL-CM3_sum_wrf_20d_2080-2099_undiff.nc
pcpt_GFDL-CM3_sum_wrf_24h_2020-2049_undiff.nc
pcpt_GFDL-CM3_sum_wrf_24h_2050-2079_undiff.nc
pcpt_GFDL-CM3_sum_wrf_24h_2080-2099_undiff.nc
pcpt_GFDL-CM3_sum_wrf_2d_2020-2049_undiff.nc
pcpt_GFDL-CM3_sum_wrf_2d_2050-2079_undiff.nc
pcpt_GFDL-CM3_sum_wrf_2d_2080-2099_undiff.nc
pcpt_GFDL-CM3_sum_wrf_2h_2020-2049_undiff.nc
pcpt_GFDL-CM3_sum_wrf_2h_2050-2079_undiff.nc
pcpt_GFDL-CM3_sum_wrf_2h_2080-2099_undiff.nc
pcpt_GFDL-CM3_sum_wrf_30d_2020-2049_undiff.nc
pcpt_GFDL-CM3_sum_wrf_30d_2050-2079_undiff.nc
pcpt_GFDL-CM3_sum_wrf_30d_2080-2099_undiff.nc
pcpt_GFDL-CM3_sum_w

In [42]:
data_dir

'/workspace/Shared/Tech_Projects/DOT/project_data/wrf_pcpt/final-fudged/'

In [50]:
fp = os.path.join(data_dir, "pcpt_GFDL-CM3_sum_wrf_{}_2020-2049_combined.nc")
ds = xr.open_dataset(fp.format("3d"))
ds.pf.values[:,2330,5200]

array([13418.832, 16710.39 , 18118.146, 19099.64 , 19372.979, 19372.979,
       19566.709, 19762.375, 19959.998], dtype=float32)

In [56]:
ds.sel(xc=xc, yc=yc).pf.values

array([13418.832, 16710.39 , 18118.146, 19099.64 , 19372.979, 19372.979,
       19566.709, 19762.375, 19959.998], dtype=float32)

In [71]:
x = np.argwhere(np.isnan(ds.pf.values[0,:50,:50]))
[item for item in x]

[array([0, 0]),
 array([0, 1]),
 array([0, 2]),
 array([0, 3]),
 array([0, 4]),
 array([0, 5]),
 array([0, 6]),
 array([0, 7]),
 array([0, 8]),
 array([0, 9]),
 array([ 0, 10]),
 array([ 0, 11]),
 array([ 0, 12]),
 array([ 0, 13]),
 array([ 0, 14]),
 array([ 0, 15]),
 array([ 0, 16]),
 array([ 0, 17]),
 array([ 0, 18]),
 array([ 0, 19]),
 array([ 0, 20]),
 array([ 0, 21]),
 array([ 0, 22]),
 array([ 0, 23]),
 array([ 0, 24]),
 array([ 0, 25]),
 array([ 0, 26]),
 array([ 0, 27]),
 array([ 0, 28]),
 array([ 0, 29]),
 array([ 0, 30]),
 array([ 0, 31]),
 array([ 0, 32]),
 array([ 0, 33]),
 array([ 0, 34]),
 array([ 0, 35]),
 array([ 0, 36]),
 array([ 0, 37]),
 array([ 0, 38]),
 array([ 0, 39]),
 array([ 0, 40]),
 array([ 0, 41]),
 array([ 0, 42]),
 array([ 0, 43]),
 array([ 0, 44]),
 array([ 0, 45]),
 array([ 0, 46]),
 array([ 0, 47]),
 array([ 0, 48]),
 array([ 0, 49]),
 array([1, 0]),
 array([1, 1]),
 array([1, 2]),
 array([1, 3]),
 array([1, 4]),
 array([1, 5]),
 array([1, 6]),
 array([

In [57]:
yc

1262145.3748724933

In [44]:
arr

array([[   672.33,    807.26,    886.42,    978.48,   1042.  ,   1098.36,
          1154.25,   1222.42,   1271.21],
       [  1131.34,   1355.64,   1488.96,   1645.53,   1750.8 ,   1848.28,
          1943.18,   2059.36,   2142.18],
       [  1590.89,   1862.99,   2009.37,   2167.62,   2265.61,   2349.81,
          2428.1 ,   2516.7 ,   2576.11],
       [  2783.65,   3333.37,   3660.83,   4049.89,   4315.95,   4561.19,
          4803.2 ,   5100.97,   5314.66],
       [  4338.64,   5553.81,   6375.58,   7436.68,   8233.82,   9038.58,
          9863.7 ,  10952.35,  11774.64],
       [  7245.28,   9501.93,  10923.97,  12629.68,  13843.46,  15027.39,
         16187.21,  17650.01,  18706.97],
       [ 13418.83,  16710.39,  18118.15,  19099.64,  19372.98,  19372.98,
         19566.71,  19762.38,  19960.  ],
       [ 14386.88,  19259.15,  22449.34,  26402.76,  29318.66,  32253.14,
         35231.07,  39194.69,  42217.18],
       [ 19890.82,  25663.54,  28998.26,  32561.86,  34762.11,  36610.56

In [32]:
arr[:, 1:] > arr[:, :-1]

array([[ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True, False,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True]])

In [33]:
arr[1:, :] > arr[:-1, :]

array([[ True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True]])

In [20]:
def transform_3338(x, y):
    transformer = Transformer.from_crs(3338, 4326, always_xy=True)
    return transformer.transform(x, y)


In [47]:
wgs84_coords = transform_3338(xc, yc)
print(wgs84_coords)

(-147.8740021963931, 61.2046357347419)


In [49]:
print("EPSG: 3338:", xc, yc)
new_3338 = transform_wgs84(*wgs84_coords)
print(new_3338)

EPSG: 3338: 327646.30521136476 1262145.3748724933
(327646.3052113644, 1262145.374872495)


In [15]:
ds = xr.open_dataset(fp.format())
xc = ds.xc.values[4904]
yc = ds.yc.values[291]
ds.sel(xc=xc, yc=yc).pf.values

In [16]:
ds.sel(xc=xc, yc=yc).pf.values

array([ 886.1201, 1249.864 , 1476.7118, 1743.2407, 1929.1549, 2108.517 ,
       2313.8643, 3023.409 , 3702.59  ], dtype=float32)

In [14]:
ds

In [10]:
test_idx = ~np.isnan(ds.pf.values)

In [12]:
np.max(ds.pf.values[test_idx])

92927.086

In [85]:
ds.close()

In [88]:
print(ds)

In [52]:
x = ds.pf.values[:,:5,:5]
y = ds.pf.values[:,7:10,4335:4338]

In [39]:
idx = np.argwhere(~np.isnan(ds.pf.values[0,:,:]))

In [40]:
idx.shape

(7111811, 2)

In [43]:
4110*7639

31396290

In [59]:
idx_arr = np.argwhere(~np.isnan(y[0,:,:]))

In [62]:
[y[:,idx[0],idx[1]] for idx in idx_arr]

[array([ 730.163 , 1033.2117, 1274.6506, 1637.0066, 1952.6128, 2312.7512,
        2771.7068, 3411.1665, 3921.4683], dtype=float32),
 array([ 729.78107, 1032.2062 , 1273.4685 , 1635.9999 , 1952.1216 ,
        2313.1897 , 2773.6172 , 3415.9934 , 3929.3083 ], dtype=float32),
 array([ 731.20715, 1035.2131 , 1277.0496 , 1639.5272 , 1954.8546 ,
        2314.3113 , 2772.0886 , 3408.9602 , 3916.4321 ], dtype=float32),
 array([ 730.8321, 1034.2052, 1275.8558, 1638.4956, 1954.3274, 2314.7014,
        2773.9355, 3413.702 , 3924.1677], dtype=float32),
 array([ 732.25055, 1037.2148 , 1279.4492 , 1642.0499 , 1957.0997 ,
        2315.8757 , 2772.476  , 3406.7615 , 3911.405  ], dtype=float32),
 array([ 731.88245, 1036.2043 , 1278.2441 , 1640.9937 , 1956.5364 ,
        2316.2175 , 2774.2598 , 3411.418  , 3919.0361 ], dtype=float32),
 array([ 731.5149, 1035.1936, 1277.038 , 1638.7598, 1955.9706, 2316.5557,
        2776.0388, 3414.7773, 3925.3389], dtype=float32)]