In [29]:
import os
import glob
import uuid
import datetime
import warnings

from itertools import product
from multiprocessing import Pool

import tqdm
import netCDF4
import numpy as np
import pandas as pd
import matplotlib.pyplot as pl

warnings.simplefilter('ignore')

# File list

In [30]:
def get_file_list(input_dir):
    '''
    Generate file list.
    
    Parameter:
    ===========
    input_dir: str
        Path to input data directory
        
    Returns:
    ========
    flist: list
        Input file list.
    '''
    flist = sorted(glob.glob(os.path.join(input_dir, '*.nc')))
    if len(flist) == 144:
        return flist
    
    flist = [None] * 144
    for cnt, (h, m) in enumerate(product(range(0, 24), range(0, 6))):
            infiles = glob.glob(os.path.join(input_dir, f'*_{h:02}{m}*.nc'))
            if len(infiles) == 0:
                continue

            infile = infiles[0]
            if os.path.isfile(infile):
                flist[cnt] = infile
    
    return flist

In [31]:
def update_metadata(gnrl_meta):
    '''
    Correct general metadata dictionnary.
    
    Parameters:
    ===========
    gnrl_meta: dict
        General metadata dictionnary.
        
    Returns:
    ========
    gnrl_meta: dict
        Updated general metadata dictionnary.
    '''
    maxlon = '132.3856852067545'
    minlon = '129.70320368213441'
    maxlat = '-10.941777804922253'
    minlat = '-13.552905831511362'
    latres = '0.0225'

    origin_altitude = '50'
    origin_latitude = '-12.249'
    origin_longitude = '131.044'
    projection = 'Azimuthal equidistant projection'

    obsolete_keys = ['original_format', 'n_gates_vary', 'driver', 'start_datetime', 
                     'start_time', 'end_datetime', 'end_time', 'scan_name', 'scan_id', 
                     'ray_times_increase', 'Conventions', 'Sub_conventions']
    for key in obsolete_keys:
        try:
            gnrl_meta.pop(key)
        except KeyError:
            pass

    gnrl_meta['version'] = '2018.06_level2'
    gnrl_meta['created'] = datetime.datetime.now().isoformat()
    gnrl_meta['uuid'] = str(uuid.uuid4())
    gnrl_meta['processing_level'] = 'L2'

    gnrl_meta['geospatial_bounds'] = f"({minlon}, {maxlon}, {minlat}, {maxlat})"
    gnrl_meta['geospatial_lat_min'] = minlat
    gnrl_meta['geospatial_lat_max'] = maxlat
    gnrl_meta['geospatial_lat_units'] = "degrees_north"
    gnrl_meta['geospatial_lat_resolution'] = latres
    gnrl_meta['geospatial_lon_min'] = minlon
    gnrl_meta['geospatial_lon_max'] = maxlon
    gnrl_meta['geospatial_lon_units'] = "degrees_east"
    gnrl_meta['geospatial_lon_resolution'] = latres
#     gnrl_meta['geospatial_vertical_min'] = '0'
#     gnrl_meta['geospatial_vertical_max'] = '20000'
#     gnrl_meta['geospatial_vertical_resolution'] = '500'
#     gnrl_meta['geospatial_vertical_units'] = "meters"
    gnrl_meta['origin_latitude'] = origin_latitude
    gnrl_meta['origin_longitude'] = origin_longitude
    gnrl_meta['origin_altitude'] = origin_altitude
    gnrl_meta['geospatial_projection'] = projection
    
    return gnrl_meta

In [32]:
def read_data(input_file, data_key='radar_estimated_rain_rate', level=0, bad=-9999):
    '''
    Read netCDF4 file, data, data metadata, and file metadata.
    
    Parameters:
    ===========
    input_file: str
        Input file name.
    data_key: str
        Data moment name
        
    Returns:
    ========
    data: ndarray
        Data
    data_meta: dict
        Data metadata
    gnrl_meta: dict
        File metadata
    '''
    try:
        with netCDF4.Dataset(input_file) as ncid:
            data = np.squeeze(ncid[data_key][:, level, :, :]).filled(bad)
            data_meta_nc = ncid[data_key]

            data_meta = dict()
            for key in data_meta_nc.ncattrs():
                data_meta[key] = data_meta_nc.getncattr(key)

            gnrl_meta = dict()
            for key in ncid.ncattrs():
                gnrl_meta[key] = ncid.getncattr(key)
    except Exception:
        print(input_file)
        raise
            
    return data, data_meta, gnrl_meta

In [33]:
def mkdir(mydir):
    if os.path.exists(mydir):
        return None
    
    try:
        os.mkdir(mydir)
    except FileExistsError:
        return None
    
    return None        

In [34]:
def make_dailys(inargs):
    x, y, date, INDIR, OUTDIR, MOMENT_NAME, RMAX, level, bad, XDIM, YDIM = inargs
    
    
    # Check if input dir exits.
    indir = os.path.join(INDIR, str(date.year), date.strftime('%Y%m%d'))
    if not os.path.exists(indir):
#         print(f'Input dir {indir} does not exist.')
        return None
    flist = get_file_list(indir)
    
    # Generate output file name
    outdir = os.path.join(OUTDIR, MOMENT_NAME.upper())
    mkdir(outdir)
    outfilename = os.path.join(outdir, 'CPOL_{}_{}.nc'.format(MOMENT_NAME.upper(), date.strftime('%Y%m%d')))
    if os.path.exists(outfilename):
        print(f'Output file {outfilename} already exists. Doing nothing.')
        return None
    
    if XDIM is None:
        XDIM = len(x)
    if YDIM is None:
        YDIM = len(y)
    
    X, Y = np.meshgrid(x, y)
    R = np.sqrt(X ** 2 + Y ** 2)
    
    # Read data
    IS_FILE = np.zeros((144), dtype=np.int32) + 1
    RAIN_TOT = np.zeros((144, XDIM, YDIM))
    for cnt, infile in enumerate(flist):
        if infile is None:
            RAIN_TOT[cnt, :, :] = np.NaN
            IS_FILE[cnt] = 0
            continue
        rain, rain_meta, gnrl_meta = read_data(infile, MOMENT_NAME, level=level, bad=bad)
        rain[R >= RMAX] = np.NaN
        RAIN_TOT[cnt, :, :] = rain

    RAIN_TOT = np.ma.masked_where(np.isnan(RAIN_TOT), RAIN_TOT)
    gnrl_meta = update_metadata(gnrl_meta)   
    
    # Generate time dimension
    st = np.array(date, dtype=np.datetime64)
    ed = np.array(date, dtype=np.datetime64) +  np.timedelta64(1,'D')
    dtime = np.arange(st, ed, np.timedelta64(10, 'm'))  # Every 10 minutes
    time_unit = f'seconds since {str(dtime[0])}'
    time = netCDF4.date2num(dtime.tolist(), time_unit).astype(np.int32)
    
    # Write data
    with netCDF4.Dataset(outfilename, 'w') as ncid:
        ncid.createDimension('time', 144)
        ncid.createDimension("longitude", XDIM)
        ncid.createDimension("latitude", YDIM)

        mymoment = ncid.createVariable(MOMENT_NAME, RAIN_TOT.dtype, ("time", "latitude", "longitude"), zlib=True, fill_value=FILLVALUE)
        ncquality = ncid.createVariable('isfile', IS_FILE.dtype, ("time",))

        nctime = ncid.createVariable('time', time.dtype, 'time')
#         ncx = ncid.createVariable('x', x.dtype, 'x')
#         ncy = ncid.createVariable('y', y.dtype, 'y')
        
        nclon = ncid.createVariable('longitude', LON.dtype, ('longitude'))
        nclat = ncid.createVariable('latitude', LAT.dtype, ('latitude'))
        nclon[:] = LON
        nclon.units = 'degrees_east'
        nclat[:] = LAT
        nclat.units = 'degrees_north'
        
        nctime[:] = time
        nctime.units = time_unit
#         ncx[:] = x
#         ncx.units = 'km'
#         ncy[:] = y
#         ncy.units = 'km'
        ncquality[:] = IS_FILE
        ncquality.units = ''
        ncquality.setncattr('description', "0: no data, 1: data available at this time step")
        
        # Write attributes
        mymoment[:] = RAIN_TOT.filled(FILLVALUE)
        for k, v in rain_meta.items():
            if k == '_FillValue':
                continue
            if k == 'standard_name' and MOMENT_NAME == 'radar_estimated_rain_rate':
                mymoment.setncattr(str(k), 'rainfall_rate')
                continue
            if k == 'standard_name' and MOMENT_NAME == 'reflectivity':
                mymoment.setncattr(str(k), 'equivalent_reflectivity_factor')
                continue
            try:
                mymoment.setncattr(str(k), str(v))
            except AttributeError:
                print(k)
                print(v)
                print(type(k))
                print(type(v))
                raise

        for k, v  in gnrl_meta.items():
            ncid.setncattr(k, str(v))
            
    return None

In [35]:
# XDIM = 117
# YDIM = 117
# RMAX = 140
# FILLVALUE = -32768
# MOMENT_NAME = 'reflectivity'  # 
# OUTDIR = '/g/data2/rr5/vhl548/NEW_CPOL_level_2'
# INDIR = '/g/data2/rr5/vhl548/CPOL_level_1b/GRIDDED/GRID_150km_2500m/'

# x = np.linspace(-145, 145, XDIM, dtype=np.float32)
# y = np.linspace(-145, 145, YDIM, dtype=np.float32)

XDIM = 141
YDIM = 141
RMAX = 140
MOMENT_NAME = 'radar_estimated_rain_rate'
OUTDIR = '/g/data2/rr5/vhl548/NEW_CPOL_level_2_1km'
INDIR = '/g/data2/rr5/vhl548/NEW_CPOL_level_1b/GRIDDED/GRID_70km_1000m/'

x = np.linspace(-70, 70, XDIM, dtype=np.float32)
y = np.linspace(-70, 70, YDIM, dtype=np.float32)

In [36]:
if XDIM == 117:
    fdlatlon = '/g/data2/rr5/vhl548/CPOL_level_1b/GRIDDED/GRID_150km_2500m/2017/20170304/CPOL_20170304_0000_GRIDS_2500m.nc'
else:
    fdlatlon = '/g/data2/rr5/vhl548/CPOL_level_1b/GRIDDED/GRID_70km_1000m/2017/20170304/CPOL_20170304_0000_GRIDS_1000m.nc'

with netCDF4.Dataset(fdlatlon) as ncid:
    mylat = np.squeeze(ncid['latitude'][:].filled(np.NaN))
    mylon = np.squeeze(ncid['longitude'][:].filled(np.NaN))

if XDIM == 117:
    LAT = mylat[0, :, 58]
    LON = mylon[0, 58, :]
else:
    LAT = mylat[0, :, 70]
    LON = mylon[0, 70, :]

In [37]:
date_range = pd.date_range('19981206', '20170502')
args_list = [None] * len(date_range)
for cnt, dt in enumerate(date_range):
    if MOMENT_NAME == 'reflectivity':
        args_list[cnt] = (x, y, dt, INDIR, OUTDIR, MOMENT_NAME, RMAX, 5, np.NaN, XDIM, YDIM)
    else:
        args_list[cnt] = (x, y, dt, INDIR, OUTDIR, MOMENT_NAME, RMAX, 0, 0, XDIM, YDIM)

In [38]:
with Pool(16) as pool:
    rslt = list(tqdm.tqdm_notebook(pool.imap(make_dailys, args_list), total=len(args_list)))

HBox(children=(IntProgress(value=0, max=6723), HTML(value='')))


