### preprocess HRRR variable into merged netcdf

In [1]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import cfgrib
import cf2cdm
from glob import glob
import cartopy.crs as ccrs
import cartopy.feature as cfeat
import xarray as xr
from datetime import datetime, timedelta
import urllib.request
from cfgrib.xarray_store import open_dataset
import warnings
import h5py

from mpl_toolkits.axes_grid1 import make_axes_locatable
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

from scipy.interpolate import interp2d
from scipy.interpolate import griddata
from scipy.stats import linregress

In [2]:
warnings.filterwarnings('ignore')

In [3]:
# get geolocation from InSAR
geo_file = '/data2/willytsai/InSAR_HRRR/CentralNevadaSenAT166/mintpy/inputs/geometryRadar.h5'
geo = h5py.File(geo_file,'r')
# for key in geo.keys():
#     print(key) #Names of the groups in HDF5 file.
lat = geo['latitude'];
lon = geo['longitude'];
incidence = geo['incidenceAngle'];
axis_bound = [np.min(lat),np.max(lat),np.min(lon),np.max(lon)]; # coordinate bound [South,North,West,East]
axis_bound = [np.unique(lat.value)[1],np.unique(lat.value)[-1],np.unique(lon.value)[0],np.unique(lon.value)[-2]]
axis_bound

[37.228, 43.223568, -119.00549, -114.65358]

In [4]:
os.chdir('/data2/willytsai/InSAR_HRRR/HRRR_data/google_archive/regrid_3km/')
files = sorted(glob('*grib2'))

In [5]:
# merge HRRR dataset 
tmp = xr.open_dataset('hrrr.20171007.t02z.regrid3km.grib2',engine='cfgrib',
                            backend_kwargs=dict(filter_by_keys={'typeOfLevel':'unknown'}))
pwat_tmp = tmp.pwat.sel(latitude=slice(axis_bound[0],axis_bound[1]),longitude=slice(axis_bound[2]+360,axis_bound[3]+360))

pwat_acqu = np.zeros((len(files),pwat_tmp.shape[0],pwat_tmp.shape[1]))
date_frame = []

for t in range(len(files)):
    
    date_frame.append(datetime.strptime(files[t][5:18],'%Y%m%d.t%Hz'))
    try:
        ds = xr.open_dataset(files[t],engine='cfgrib',
                            backend_kwargs=dict(filter_by_keys={'typeOfLevel':'unknown'}))
        pwat = ds.pwat.sel(latitude=slice(axis_bound[0],axis_bound[1]),longitude=slice(axis_bound[2]+360,axis_bound[3]+360))
        pwat_acqu[t,:,:] = pwat.values   
    except:
        print('ERROR file: '+files[t])
        pwat_acqu[t,:,:] = np.nan

# convert into xarray 
pwat_acqu_xr = xr.DataArray(pwat_acqu,dims=('time','latitude','longitude')
                            ,coords=(date_frame,pwat.latitude,pwat.longitude-360),name='pwat')
#pwat_acqu_xr.to_netcdf('/data2/willytsai/InSAR_HRRR/HRRR_data/t02z/merged/HRRR_pwat_NEVADA.nc')
pwat_ds = pwat_acqu_xr.to_dataset(name='pwat')
pwat_ds.to_netcdf('/data2/willytsai/InSAR_HRRR/HRRR_data/google_archive/regrid_3km/HRRR_pwat_NEVADA.nc')

ERROR file: hrrr.20150322.t02z.regrid3km.grib2
ERROR file: hrrr.20150403.t02z.regrid3km.grib2
ERROR file: hrrr.20150415.t02z.regrid3km.grib2
ERROR file: hrrr.20150509.t02z.regrid3km.grib2
ERROR file: hrrr.20150626.t02z.regrid3km.grib2
ERROR file: hrrr.20150720.t02z.regrid3km.grib2
ERROR file: hrrr.20150906.t02z.regrid3km.grib2
ERROR file: hrrr.20160104.t02z.regrid3km.grib2
ERROR file: hrrr.20160221.t02z.regrid3km.grib2
ERROR file: hrrr.20160316.t02z.regrid3km.grib2
ERROR file: hrrr.20160409.t02z.regrid3km.grib2
ERROR file: hrrr.20160503.t02z.regrid3km.grib2
ERROR file: hrrr.20160527.t02z.regrid3km.grib2
ERROR file: hrrr.20160714.t02z.regrid3km.grib2
ERROR file: hrrr.20160807.t02z.regrid3km.grib2


In [None]:
tmp = xr.open_dataset('hrrr.20171007.t02z.regrid3km.grib2',engine='cfgrib',
                            backend_kwargs=dict(filter_by_keys={'stepType': 'instant', 'typeOfLevel': 'surface'}))

In [None]:
ps_tmp = tmp.sp.sel(latitude=slice(axis_bound[0],axis_bound[1]),longitude=slice(axis_bound[2]+360,axis_bound[3]+360))

In [None]:
ps_tmp

In [None]:
# merge HRRR dataset 
tmp = xr.open_dataset('hrrr.20171007.t02z.regrid3km.grib2',engine='cfgrib',
                            backend_kwargs=dict(filter_by_keys={'stepType': 'instant', 'typeOfLevel': 'surface'}))
ps_tmp = tmp.sp.sel(latitude=slice(axis_bound[0],axis_bound[1]),longitude=slice(axis_bound[2]+360,axis_bound[3]+360))

ps_acqu = np.zeros((len(files),ps_tmp.shape[0],ps_tmp.shape[1]))
date_frame = []

for t in range(len(files)):
    print(files[0])
    date_frame.append(datetime.strptime(files[t][5:18],'%Y%m%d.t%Hz'))
    try:
        ds = xr.open_dataset(files[t],engine='cfgrib',
                            backend_kwargs=dict(filter_by_keys={'stepType': 'instant', 'typeOfLevel': 'surface'}))
        ps = ds.sp.sel(latitude=slice(axis_bound[0],axis_bound[1]),longitude=slice(axis_bound[2]+360,axis_bound[3]+360))
        ps_acqu[t,:,:] = ps.values   
    except:
        print('ERROR file: '+files[t])
        ps_acqu[t,:,:] = np.nan

# convert into xarray 
ps_acqu_xr = xr.DataArray(ps_acqu,dims=('time','latitude','longitude')
                            ,coords=(date_frame,ps.latitude,ps.longitude-360),name='ps')
#pwat_acqu_xr.to_netcdf('/data2/willytsai/InSAR_HRRR/HRRR_data/t02z/merged/HRRR_pwat_NEVADA.nc')
ps_ds = ps_acqu_xr.to_dataset(name='ps')
ps_ds.to_netcdf('/data2/willytsai/InSAR_HRRR/HRRR_data/t02z/merged/HRRR_Psfc_NEVADA.nc')