# This is the title


In [2]:
import os
from datetime import datetime, timedelta
import pyproj

import re
import xarray
import requests
import warnings
import matplotlib
import numpy as np
import holoviews as hv
from matplotlib import rc
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
from matplotlib.animation import FuncAnimation

In [3]:
# data availability
# Earliest available: 2019-09-03
# Latest available: current


# output file types
#  CHANNEL_RT
#   time "minutes since 1970-01-01 00:00:00 UTC" ;
#   streamflow River Flow" "m3 s-1" ;
#   nudge: Amount of stream flow alteration m3 s-1
#   q_lateral: Runoff into channel reach m3 s-1
#   velocity: River Velocity m s-1
#   feature_id: Reach ID

#  LAND
#   time:  minutes since 1970-01-01 00:00:00 UTC
#   SNOWH: Snow depth, m
#   SNEQV: Snow water equivalent kg m-2
#   FSNO: Snow-cover fraction on the ground
#   ACCET: Accumulated total ET, mm
#   SOILSAT_TOP: Fraction of soil saturation, top 2 layers
#   SNOWT_AVG: Average snow temperature (by layer mass), K
#   x: x coordinate of projection
#   y: y coordinate of projection

#  TERRAIN_RT
#   time:  minutes since 1970-01-01 00:00:00 UTC
#   zwattablrt: water table depth, m
#   sfcheadsubrt: surface head, mm
#   x: x coordinate of projection
#   y: y coordinate of projection

#  RESERVOIR
#   elevation: Water Surface Elevation, m
#   inflow: Lake Inflow, m3 s-1
#   outflow: Lake Outflow, m3 s-1
#   time: minutes since 1970-01-01 00:00:00 UTC
#   latitude: Lake latitude, degrees_north
#   longitude: Lake longitude, degrees_east
#   feature_id: ComID from NHDPlusV2 waterbody layer


In [4]:
# define the url of the thredds server
thredds_base='http://thredds.hydroshare.org/thredds/dodsC/nwm'

In [5]:
def get_file_name(base_url, config, dt, timestep, init=0, output='land'):
    """
    function to build file urls from the input parameters defined above
    """
    p =f'{config}/nwm.{dt}.t{timestep:0>2}z.{config}.{output}.' + \
       f'tm{init:0>2}.conus.nc'
    return os.path.join(base_url, p)


### Build list of file paths

In [6]:
# specify date and options arguments

st = datetime(2020, 8, 10)
et = datetime(2020, 8, 12)

initialization_time = 0
configuration = 'analysis_assim'
num_timesteps = 24
step_size = 1
output_type = 'land'

In [7]:
# build a list of files that we're interested in
curr_time = st

# loop through time
paths = []
while curr_time <= et:
    dt = curr_time.strftime('%Y%m%d')
    
    for i in range(0, num_timesteps):
        timestep = i * step_size
        fname = get_file_name(thredds_base,
                              configuration,
                              dt,
                              timestep,
                              initialization_time,
                              output_type)
        paths.append(fname)

    # increment time
    curr_time = curr_time + timedelta(days=1)

### Read files using Xarray

Open a single dataset

In [8]:
%%time
# open many dataset using xarray. This will take some time because the metadata needs to be read from each file.
mds = xarray.open_mfdataset(paths)

CPU times: user 10.4 s, sys: 301 ms, total: 10.7 s
Wall time: 29.9 s


In [9]:
%%time
mds

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs


Unnamed: 0,Array,Chunk
Bytes,2.20 TB,30.58 GB
Shape,"(72, 72, 3840, 3, 4608)","(72, 1, 3840, 3, 4608)"
Count,944 Tasks,72 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 2.20 TB 30.58 GB Shape (72, 72, 3840, 3, 4608) (72, 1, 3840, 3, 4608) Count 944 Tasks 72 Chunks Type float64 numpy.ndarray",72  72  4608  3  3840,

Unnamed: 0,Array,Chunk
Bytes,2.20 TB,30.58 GB
Shape,"(72, 72, 3840, 3, 4608)","(72, 1, 3840, 3, 4608)"
Count,944 Tasks,72 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.94 TB,40.77 GB
Shape,"(72, 72, 3840, 4, 4608)","(72, 1, 3840, 4, 4608)"
Count,944 Tasks,72 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 2.94 TB 40.77 GB Shape (72, 72, 3840, 4, 4608) (72, 1, 3840, 4, 4608) Count 944 Tasks 72 Chunks Type float64 numpy.ndarray",72  72  4608  4  3840,

Unnamed: 0,Array,Chunk
Bytes,2.94 TB,40.77 GB
Shape,"(72, 72, 3840, 4, 4608)","(72, 1, 3840, 4, 4608)"
Count,944 Tasks,72 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.94 TB,40.77 GB
Shape,"(72, 72, 3840, 4, 4608)","(72, 1, 3840, 4, 4608)"
Count,944 Tasks,72 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 2.94 TB 40.77 GB Shape (72, 72, 3840, 4, 4608) (72, 1, 3840, 4, 4608) Count 944 Tasks 72 Chunks Type float64 numpy.ndarray",72  72  4608  4  3840,

Unnamed: 0,Array,Chunk
Bytes,2.94 TB,40.77 GB
Shape,"(72, 72, 3840, 4, 4608)","(72, 1, 3840, 4, 4608)"
Count,944 Tasks,72 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 733.84 GB 10.19 GB Shape (72, 72, 3840, 4608) (72, 1, 3840, 4608) Count 870 Tasks 72 Chunks Type float64 numpy.ndarray",72  1  4608  3840  72,

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 733.84 GB 10.19 GB Shape (72, 72, 3840, 4608) (72, 1, 3840, 4608) Count 870 Tasks 72 Chunks Type float64 numpy.ndarray",72  1  4608  3840  72,

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 733.84 GB 10.19 GB Shape (72, 72, 3840, 4608) (72, 1, 3840, 4608) Count 870 Tasks 72 Chunks Type float64 numpy.ndarray",72  1  4608  3840  72,

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 733.84 GB 10.19 GB Shape (72, 72, 3840, 4608) (72, 1, 3840, 4608) Count 870 Tasks 72 Chunks Type float64 numpy.ndarray",72  1  4608  3840  72,

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 733.84 GB 10.19 GB Shape (72, 72, 3840, 4608) (72, 1, 3840, 4608) Count 870 Tasks 72 Chunks Type float64 numpy.ndarray",72  1  4608  3840  72,

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 733.84 GB 10.19 GB Shape (72, 72, 3840, 4608) (72, 1, 3840, 4608) Count 870 Tasks 72 Chunks Type float64 numpy.ndarray",72  1  4608  3840  72,

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 733.84 GB 10.19 GB Shape (72, 72, 3840, 4608) (72, 1, 3840, 4608) Count 870 Tasks 72 Chunks Type float64 numpy.ndarray",72  1  4608  3840  72,

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 733.84 GB 10.19 GB Shape (72, 72, 3840, 4608) (72, 1, 3840, 4608) Count 870 Tasks 72 Chunks Type float64 numpy.ndarray",72  1  4608  3840  72,

Unnamed: 0,Array,Chunk
Bytes,733.84 GB,10.19 GB
Shape,"(72, 72, 3840, 4608)","(72, 1, 3840, 4608)"
Count,870 Tasks,72 Chunks
Type,float64,numpy.ndarray


Convert coordinates of interest into the NWM projection

In [None]:
wgs84_proj4 = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs "
wrf_proj4 = "+proj=lcc +lat_1=30 +lat_2=60 +lat_0=40 +lon_0=-97 +x_0=0 +y_0=0 +a=6370000 +b=6370000 +units=m +no_defs"

In [None]:
llon, llat, ulon, ulat = [-122.6089,   44.8004, -121.6515,   45.4562]
#llon, llat, ulon, ulat = [-105.8033,   40.2271, -104.7605,   40.629]

#from 2001-09-06 to 2020-08-26


In [None]:
def transform_coords(insrs, outsrs, lon, lat):
    inProj = pyproj.Proj(insrs)
    outProj = pyproj.Proj(outsrs)
    return pyproj.transform(inProj, outProj, lon, lat)

In [None]:
lon_min, lat_min = transform_coords(wgs84_proj4, wrf_proj4, llon, llat)
lon_max, lat_max = transform_coords(wgs84_proj4, wrf_proj4, ulon, ulat)

In [None]:
lats = mds.variables['y'][:] 
lons = mds.variables['x'][:]
lat_bnds = [ lat_min , lat_max ]
lon_bnds = [ lon_min , lon_max ] 

lat_inds = np.where((lats > lat_bnds[0]) & (lats < lat_bnds[1]))
lon_inds = np.where((lons > lon_bnds[0]) & (lons < lon_bnds[1]))

In [None]:
llat = lat_inds[0].min()
ulat = lat_inds[0].max()
llon = lon_inds[0].min()
ulon = lon_inds[0].max()

print('latitude index range:  %d -> %d' % (llat, ulat))
print('longitude index range: %d -> %d' % (llon, ulon))

In [None]:
# perform subset
subset = mds.isel(x=lon_inds[0], y=lat_inds[0])

# view the subsetted data
subset

In [1]:
# this fails on the CUAHSI JupyterHub because it's too memory intensive.
subset.to_netcdf('region1.nc')

NameError: name 'subset' is not defined