In [7]:
import os
import glob
import zipfile

import pyart
import dask.bag as db
from dask.diagnostics import ProgressBar

In [2]:
def extract_zip(inzip, path):
    '''
    Extract content of a zipfile inside a given directory.

    Parameters:
    ===========
    inzip: str
        Input zip file.
    path: str
        Output path.

    Returns:
    ========
    namelist: List
        List of files extracted from  the zip.
    '''
    with zipfile.ZipFile(inzip) as zid:
        zid.extractall(path=path)
        namelist = [os.path.join(path, f) for f in zid.namelist()]
    return namelist


def mkdir(path):
    '''
    Create the DIRECTORY(ies), if they do not already exist
    '''
    try:
        os.mkdir(path)
    except FileExistsError:
        pass

    return None


def remove(flist):
    '''
    Remove file if it exists.
    '''
    flist = [f for f in flist if f is not None]
    for f in flist:
        try:
            os.remove(f)
        except FileNotFoundError:
            pass
    return None

In [14]:
def check_total_power(infile):
    try:
        radar = pyart.aux_io.read_odim_h5(infile)
    except Exception:
        return False
    
    try:
        radar.fields['total_power']
    except KeyError:
        del radar
        return False
    
    del radar
    return True

In [4]:
def check_file(infile):
    rslts = None
    zipdir = '/scratch/kl02/vhl548/unzipdir/'
    namelist = extract_zip(infile, zipdir)
    if check_total_power(namelist[0]):
        rslts = infile
    remove(namelist)

    return rslts

In [15]:
for year in range(2018, 2021):
    if year == 2019:
        continue
        
    flist = sorted(glob.glob(f'/g/data/rq0/odim_archive/odim_pvol/**/{year}/vol/*0101.pvol.zip'))

    bag = db.from_sequence(flist).map(check_file)
    with ProgressBar():
        rslt = bag.compute()

    rslt = [r for r in rslt if r is not None]

    with open('total_power_radars.txt', 'a+') as fid:
        fid.write("\n".join(rslt))

[########################################] | 100% Completed | 11.5s
[########################################] | 100% Completed | 15.1s
