# Operational GEFS preprocessing

This notebook converts operatoinal GEFS enemble mean grib2 files into h5 format

* `$camp_dir/GFS/geave_y{}.hdf`

In [1]:
import os
import sys
import time
import h5py
import pygrib

import numpy as np
# import netCDF4 as nc
from glob import glob

from scipy.interpolate import RegularGridInterpolator
from datetime import datetime, timedelta

In [2]:
sys.path.insert(0, '/glade/u/home/ksha/GAN_proj/')
sys.path.insert(0, '/glade/u/home/ksha/GAN_proj/libs/')

from namelist import *
import data_utils as du

In [3]:
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
# base = datetime(2021, 1, 1)
# date_list = [base + timedelta(days=day) for day in range(365)]

base = datetime(2020, 12, 1)
date_list = [base + timedelta(days=day) for day in range(31)]

LEADs = np.arange(6, 144+6, 6)

with h5py.File(save_dir+'CCPA_domain.hdf', 'r') as h5io:
    lon_CCPA = h5io['lon_CCPA'][...]
    lat_CCPA = h5io['lat_CCPA'][...]

lon_CCPA += 360

In [5]:
GEFS_dir = camp_dir+'wget_GEFSv12_members/'
filenames = sorted(glob(GEFS_dir+'*f*'))

with pygrib.open(filenames[0]) as grbio:
    lat_GFS, lon_GFS = grbio[1].latlons()

In [6]:
ens_names = ['gec00',]

for i in range(1, 31, 1):
    ens_names.append('gep{:02d}'.format(i))

In [7]:
grid_shape = lon_CCPA.shape
N_days = len(date_list)
N_leads = len(LEADs)
N_ens = len(ens_names)

apcp_save = np.empty((N_leads, N_ens,)+grid_shape); apcp_save[...] = np.nan

In [1]:
filename_base = camp_dir+'wget_GEFSv12_members/{}_00_{}.t00z.pgrb2s.0p25.f{:03d}'

for i, dt in enumerate(date_list):
    dt_str = datetime.strftime(dt, '%Y%m%d')
    apcp_save[...] = np.nan
    
    name_check = camp_dir+'GFS/{}.hdf'.format(dt_str)

    if os.path.isfile(name_check) is False:
        print('Missing {}'.format(name_check))
        # get the current member name
        flag_all_member_ready = True
        for ilead, lead in enumerate(LEADs):    
            for iens, ens_name in enumerate(ens_names):
                filename_ = filename_base.format(dt_str, ens_name, lead)
                # if this member does not exist, skip
                if os.path.isfile(filename_) is False:
                    print('Missing {}'.format(filename_))
                    flag_all_member_ready = False
                    break;
                    
        #if flag_all_member_ready:
        for iens, ens_name in enumerate(ens_names):
            for ilead, lead in enumerate(LEADs):
                filename_ = filename_base.format(dt_str, ens_name, lead)
                try:
                    with pygrib.open(filename_) as grbio:
                        apcp = grbio[1].values
                
                    lr_to_hr = RegularGridInterpolator((lat_GFS[:, 0], lon_GFS[0, :]), apcp, 
                                                       bounds_error=False, fill_value=None)
                    apcp_hr0 = lr_to_hr((lat_CCPA, lon_CCPA))
                    
                    apcp_save[ilead, iens, ...] = apcp_hr0
                except:
                    print('Empty or failed file: {}'.format(filename_))
                    apcp_save[ilead, iens, ...] = np.nan
        
        # tuple_save = (apcp_save,)
        # label_save = ['apcp',]
        # du.save_hdf5(tuple_save, label_save, camp_dir+'GFS/', 
        #              '{}.hdf'.format(dt_str))

## Merge single day files as one

In [8]:
apcp_all = np.empty((N_days, N_leads, N_ens)+grid_shape); apcp_all[...] = np.nan

In [2]:
filename_base = camp_dir+'wget_GEFSv12_members/{}_00_{}.t00z.pgrb2s.0p25.f{:03d}'

for i, dt in enumerate(date_list):
    
    dt_str = datetime.strftime(dt, '%Y%m%d')
    print(dt_str)
    
    name_load = camp_dir+'GFS/{}.hdf'.format(dt_str)
    with h5py.File(name_load, 'r') as h5io:
        apcp_all[i, ...] = h5io['apcp'][...]
            
# tuple_save = (apcp_all,)
# label_save = ['apcp',]
# du.save_hdf5(tuple_save, label_save, camp_dir+'GFS/', 
#              'GEFS_OPT_MEMBERS_2020_DEC.hdf')

In [9]:
with h5py.File(camp_dir+'GFS/GEFS_OPT_MEMBERS_2020_DEC.hdf', 'r') as h5io:
    apcp = h5io['apcp'][...]

In [11]:
apcp_mean = np.nanmean(apcp, axis=2)

In [17]:
apcp_full = np.empty((366, 24, 31, 224, 464)); apcp_full[...] = np.nan
apcp_full[-31:, ...] = apcp

apcp_mean_full = np.empty((366, 24, 224, 464)); apcp_mean_full[...] = np.nan
apcp_mean_full[-31:, ...] = apcp_mean

In [3]:
# tuple_save = (apcp_full, apcp_mean_full)
# label_save = ['apcp', 'apcp_mean']
# du.save_hdf5(tuple_save, label_save, camp_dir+'GFS/', 
#              'GEFS_OPT_MEMBERS_2020_DEC.hdf')