This notebooks is for aligning netcdf grids

In [149]:
import numpy
import xarray as xr
from scipy.misc import imresize

In [59]:
ds_fine = xr.open_dataset('../example_data/GLDAS/FORCING/2019022600.LDASIN_DOMAIN1')
ds_coarse = xr.open_dataset('../example_data/GLDAS/input_files/GLDAS_NOAH025_3H.A20190226.0000.021.nc4')

Look at the shapes of these datasets

In [60]:
shape_fine = ds_fine.lat.values.shape
shape_coarse = ds_coarse.SWdown_f_tavg.shape

print('Fine resolution shape: %s' % str(shape_fine))
print('Coarse resolution shape: %s' % str(shape_coarse))

Fine resolution shape: (56, 72)
Coarse resolution shape: (1, 600, 1440)


Take a look at the data and make sure there are non-nan values

In [61]:
ds_coarse.Rainf_f_tavg

<xarray.DataArray 'Rainf_f_tavg' (time: 1, lat: 600, lon: 1440)>
[864000 values with dtype=float32]
Coordinates:
  * lat      (lat) float32 -59.875 -59.625 -59.375 ... 89.375 89.625 89.875
  * lon      (lon) float32 -179.875 -179.625 -179.375 ... 179.625 179.875
  * time     (time) datetime64[ns] 2019-02-26
Attributes:
    units:          kg m-2 s-1
    standard_name:  precipitation_flux
    long_name:      Total precipitation rate
    cell_methods:   time: mean
    vmin:           0.0
    vmax:           0.009105

In [62]:
arr = ds_coarse.Rainf_f_tavg.values[0]

In [63]:
numpy.nanmin(ds_coarse.Rainf_f_tavg.values)

0.0

In [64]:
numpy.nanmax(ds_coarse.Rainf_f_tavg.values)

0.0091049997

Get the corner lat/lon for the target array

In [65]:
xmin = ds_fine.lon.values.min()
xmax = ds_fine.lon.values.max()
ymin = ds_fine.lat.values.min()
ymax = ds_fine.lat.values.max()

Select all values in the source array within this range

In [66]:
lons = ds_coarse.lon.values
lats = ds_coarse.lat.values

In [67]:
# get the coordinates in the source array that match the range of the target
inner_lons = numpy.where((lons < xmax) & (lons > xmin))
inner_lats = numpy.where((lats < ymax) & (lats > ymin))

In [68]:
inner_lons

(array([87, 88, 89]),)

In [69]:
inner_lats

(array([325, 326]),)

In [70]:
# define index bounds for selecting data using numpy indexing
llon = inner_lons[0].min()
ulon = inner_lons[0].max() + 1
llat = inner_lats[0].min()
ulat = inner_lats[0].max() + 1

In [71]:
# select the data from the target for this range
arr = ds_coarse['Rainf_f_tavg'][0].values
subset = arr[llon:ulon, llat:ulat]
print(subset)

[[ nan  nan]
 [ nan  nan]
 [ nan  nan]]


In [141]:
# replace nan with 0
subset = numpy.nan_to_num(subset)

# get the max value used for converting the interpolated data back into
# the original range
subset_max = subset.max()

In [142]:
subset

array([[    0.,  1000.],
       [    0.,     0.],
       [  200.,     0.]], dtype=float32)

In [143]:
# insert some random values (testing only)
subset[0][1] = 1000
subset[2][0] = 200

In [144]:
subset

array([[    0.,  1000.],
       [    0.,     0.],
       [  200.,     0.]], dtype=float32)

In [150]:
# resize the array to the shape of the target
d = imresize(subset,
         (10,10),
         interp='bilinear',
         mode=None)


`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  """


In [151]:
# convert from uint8 back into our original range of data
d = d/255 * subset_max
d

array([[    0.        ,     0.        ,     0.        ,   200.        ,
          400.        ,   600.        ,   800.        ,  1000.        ,
         1000.        ,  1000.        ],
       [    0.        ,     0.        ,     0.        ,   200.        ,
          400.        ,   600.        ,   800.        ,  1000.        ,
         1000.        ,  1000.        ],
       [    0.        ,     0.        ,     0.        ,   149.01960784,
          301.96078431,   450.98039216,   600.        ,   749.01960784,
          749.01960784,   749.01960784],
       [    0.        ,     0.        ,     0.        ,    90.19607843,
          180.39215686,   270.58823529,   360.78431373,   450.98039216,
          450.98039216,   450.98039216],
       [    0.        ,     0.        ,     0.        ,    31.37254902,
           58.82352941,    90.19607843,   121.56862745,   149.01960784,
          149.01960784,   149.01960784],
       [   31.37254902,    31.37254902,    31.37254902,    23.52941176,
   

Create a function to do these steps

In [169]:
def regrid_variables(source, target, variables):
    ds_fine = xr.open_dataset(source)
    ds_coarse = xr.open_dataset(target)
    
    xmin = ds_fine.lon.values.min()
    xmax = ds_fine.lon.values.max()
    ymin = ds_fine.lat.values.min()
    ymax = ds_fine.lat.values.max()
    
    lons = ds_coarse.lon.values
    lats = ds_coarse.lat.values

    # get the coordinates in the source array that match the
    # range of the target
    inner_lons = numpy.where((lons < xmax) & (lons > xmin))
    inner_lats = numpy.where((lats < ymax) & (lats > ymin))
    
    # define index bounds for selecting data using numpy indexing
    llon = inner_lons[0].min()
    ulon = inner_lons[0].max() + 1
    llat = inner_lats[0].min()
    ulat = inner_lats[0].max() + 1
    
    regridded_data = {}
    for v in variables:
    
        # select the data from the target for this range
        arr = ds_coarse[v][0].values
        subset = arr[llon:ulon, llat:ulat]

        # replace nan with 0
        subset = numpy.nan_to_num(subset)

        # get the max value used for converting the interpolated data back into
        # the original range
        subset_max = subset.max()


        # resize the array to the shape of the target
        d = imresize(subset,
                 (10,10),
                 interp='bilinear',
                 mode=None)


        # convert from uint8 back into our original range of data
        d = d/255 * subset_max
        regridded_data[v] = d
    return regridded_data

In [170]:
target = '../example_data/GLDAS/FORCING/2019022600.LDASIN_DOMAIN1'
source = '../example_data/GLDAS/input_files/GLDAS_NOAH025_3H.A20190226.0000.021.nc4'
variables = ['T2D',
             'Q2D',
             'U2D',
             'V2D',
             'RAINRATE',
             'SWDOWN',
             'LWDOWN',
             'PSFC']
regrid_variables(source, target, variables)

`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.


{'T2D': array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]]),
 'Q2D': array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0., 