In [1]:
import gc
import os
import sys
import glob
import time
import zipfile
import argparse
import datetime
import warnings
import traceback

import crayons
import numpy as np
import pandas as pd
import dask.bag as db

import cluttercal
from cluttercal.cluttermask import EmptyFieldError

In [2]:
warnings.simplefilter("ignore")

  and should_run_async(code)


In [16]:
def buffer(infile, cmask):
    """
    Buffer function to catch and kill errors.

    Parameters:
    ===========
    infile: str
        Input radar file.

    Returns:
    ========
    dtime: np.datetime64
        Datetime of infile
    rca: float
        95th percentile of the clutter reflectivity.
    """
    try:
        dtime, rca = cluttercal.extract_clutter(infile, cmask, refl_name="total_power")
    except ValueError:
        return None
    except Exception:
        print(infile)
        traceback.print_exc()
        return None

    return dtime, rca

In [14]:
def mkdir(path):
    """
    Create the DIRECTORY(ies), if they do not already exist
    """
    try:
        os.mkdir(path)
    except FileExistsError:
        pass

    return None

In [3]:
def savedata(df, date, path):
    """
    Save the output data into a CSV file compatible with pandas.

    Parameters:
    ===========
    df: pd.Dataframe
        RCA timeserie to be saved.
    date:
        Date of processing.
    path: str
        Output directory.
    """
    datestr = date.strftime("%Y%m%d")

    path = os.path.join(path, "rca")
    mkdir(path)
    path = os.path.join(path, str(RID))
    mkdir(path)

    outfilename = os.path.join(path, f"rca.{RID}.{datestr}.csv")
    df.to_csv(outfilename)
    print(crayons.green(f"Found {len(df)} hits for {datestr}."))
    print(crayons.green(f"Results saved in {outfilename}."))

    return None


def gen_cmask(radar_file_list, date, file_prefix=None):
    """
    Generate the clutter mask for a given day and save the clutter mask as a
    netCDF.

    Parameters:
    ===========
    radar_file_list: list
        List radar files for the given date.
    date: datetime
        Date.

    Returns:
    ========
    outpath: str
        Output directory for the clutter masks.
    """
    if file_prefix is None:
        file_prefix = f"{RID}_"
    datestr = date.strftime("%Y%m%d")

    outpath = os.path.join(OUTPATH, "cmasks")
    mkdir(outpath)
    outpath = os.path.join(outpath, f"{RID}")
    mkdir(outpath)
    outputfile = os.path.join(outpath, file_prefix + f"{datestr}.nc")

    if os.path.isfile(outputfile):
        print("Clutter masks already exists. Doing nothing.")
    else:
        try:
            cmask = cluttercal.clutter_mask(
                radar_file_list,
                refl_name="total_power",
                refl_threshold=REFL_THLD,
                max_range=20e3,
                freq_threshold=50,
                use_dask=True,
            )
            if cmask is None:
                print(crayons.red(f"!!! COULD NOT CREATE CLUTTER MAP FOR {date} !!!"))
            else:
                cmask.to_netcdf(outputfile)
        except Exception:
            traceback.print_exc()
            pass

    return outputfile

In [8]:
def get_files(date):
    year = str(date.year)
    datestr = date.strftime("%Y%m%d")
    path = f"/g/data/hj10/cpol/cpol_level_1b/v2020/ppi/{year}/{datestr}/*.nc"
    flist = sorted(glob.glob(path))
    return flist

In [46]:
def main(date_range):
    """
    Loop over dates:
    1/ Unzip archives.
    2/ Generate clutter mask for given date.
    3/ Generate composite mask.
    4/ Get the 95th percentile of the clutter reflectivity.
    5/ Save data for the given date.
    6/ Remove unzipped file and go to next iteration.
    """
    prefix = f"{RID}_"
    for date in date_range:
        namelist = get_files(date)
        if len(namelist) == 0:
            continue

        print(crayons.yellow(f"{len(namelist)} files to process for {date}."))

        # Generate clutter mask for the given date.
        outpath = gen_cmask(namelist, date, file_prefix=prefix)

        # Generate composite mask.
        try:
            cmask = cluttercal.composite_mask(date, timedelta=7, indir=outpath, prefix=prefix)
            print("Used composite mask")
        except ValueError:
            try:
                cmask = cluttercal.single_mask(outpath)
                print("Used single mask")
            except Exception:
                continue

        # Extract the clutter reflectivity for the given date.
        arglist = [(f, cmask) for f in namelist]
        bag = db.from_sequence(arglist).starmap(buffer)
        rslt = bag.compute()

        saved = False
        if rslt is not None:
            rslt = [r for r in rslt if r is not None]
            if len(rslt) != 0:
                ttmp, rtmp = zip(*rslt)
                rca = np.array(rtmp)
                dtime = np.array(ttmp, dtype="datetime64")

                if len(rca) != 0:
                    df = pd.DataFrame({"rca": rca}, index=dtime)
                    savedata(df, date, path=OUTPATH)
                    saved = True
    return None

In [47]:
RID = 500
ZIPDIR = "/scratch/kl02/vhl548/unzipdir/"
OUTPATH = "/scratch/kl02/vhl548/rca_output/"
REFL_NAME = "total_power"
REFL_THLD = 40

In [48]:
date_range = pd.date_range("2010-04-01", "2010-05-31")

In [49]:
main(date_range)

143 files to process for 2010-04-01 00:00:00.
Used composite mask
Found 142 hits for 20100401.
Results saved in /scratch/kl02/vhl548/rca_output/rca/500/rca.500.20100401.csv.
144 files to process for 2010-04-02 00:00:00.
Used composite mask
Found 143 hits for 20100402.
Results saved in /scratch/kl02/vhl548/rca_output/rca/500/rca.500.20100402.csv.
144 files to process for 2010-04-03 00:00:00.
Used composite mask
Found 144 hits for 20100403.
Results saved in /scratch/kl02/vhl548/rca_output/rca/500/rca.500.20100403.csv.
144 files to process for 2010-04-04 00:00:00.
Used composite mask
Found 143 hits for 20100404.
Results saved in /scratch/kl02/vhl548/rca_output/rca/500/rca.500.20100404.csv.
144 files to process for 2010-04-05 00:00:00.
Used composite mask
Found 144 hits for 20100405.
Results saved in /scratch/kl02/vhl548/rca_output/rca/500/rca.500.20100405.csv.
144 files to process for 2010-04-06 00:00:00.
Used composite mask
Found 143 hits for 20100406.
Results saved in /scratch/kl02/vhl5