In [1]:
import os
from tqdm import tqdm
import xarray as xa
import numpy as np
import pandas as pd
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from pylibs.plot_utils import set_size, setupax_2dmap
from pylibs.utils import get_dates
from joblib import Parallel, delayed

In [2]:
sdate = 2024110100
edate = 2024113018
date_interval = 6
dates = get_dates(sdate, edate, date_interval)
plot_ch = 4
plotvar = 'aerosolOpticalDepth'
lon_res = 0.625
lat_res = 0.5
filetype = 'hofx'

In [3]:
# ioda_path = '/glade/campaign/ncar/nmmm0072/Data/obs'
ioda_path = '/glade/work/swei/Git/JEDI-METplus/output/aodobs_merra2/hofx/f00'
obs_name_list = [
    'modis_terra_aod', 'modis_aqua_aod', 'pace_aod',
    'viirs_aod_dt_npp', 'viirs_aod_dt_n20',
    'viirs_aod_db_npp', 'viirs_aod_db_n20',
]

In [40]:
latbin = np.arange(-90-0.5*lat_res, 90+lat_res, lat_res)
latgrd = np.arange(-90, 90+0.5*lat_res, lat_res)
lonbin = np.arange(-180-0.5*lon_res, 180, lon_res)
longrd = np.arange(-180, 180, lon_res)

In [29]:
def process_obsname(obsname):
    print(f'processing {obsname}')
    for i, cdate in tqdm(enumerate(dates)):
        cdate_str = cdate.strftime('%Y%m%d%H')
        iodafile = f'{ioda_path}/{obsname}/{filetype}.{obsname}.{cdate_str}.nc4'

        dims_ds = xa.open_dataset(iodafile)
        channel = dims_ds.Channel.values

        meta_ds = xa.open_dataset(iodafile, group='MetaData')
        lats = meta_ds.latitude.data
        lons = meta_ds.longitude.data

        obsv_ds = xa.open_dataset(iodafile, group='ObsValue').assign_coords(Channel=channel.astype(np.int32))
        obsv_data = obsv_ds[plotvar].sel(Channel=plot_ch).data
        hofx_ds = xa.open_dataset(iodafile, group='hofx').assign_coords(Channel=channel.astype(np.int32))
        hofx_data = hofx_ds[plotvar].sel(Channel=plot_ch).data
        ombs_data = obsv_data - hofx_data

        data_dict = {
            f'{obsname}_obs': (['locs'], obsv_data),
            f'{obsname}_hofx': (['locs'], hofx_data),
            f'{obsname}_omb': (['locs'], ombs_data),
            'lat': (['locs'], lats),
            'lon': (['locs'], lons),
        }

        coord_dict = {'locs': range(lons.size)}
        ds = xa.Dataset(data_dict, coords=coord_dict)

        df = ds.to_dataframe()
        df['latitude'] = pd.cut(df['lat'], bins=latbin, labels=latgrd)
        df['longitude'] = pd.cut(df['lon'], bins=lonbin, labels=longrd)
        grp = df.groupby(['latitude', 'longitude'], observed=False).agg({
            f'{obsname}_obs':['count', 'mean', 'std'],
            f'{obsname}_hofx':['mean', 'std'],
            f'{obsname}_omb':['mean', 'std'],
        })
        grp.columns = [f"{lvl1}_{lvl2}" for lvl1, lvl2 in grp.columns]
        gridded_ds = grp.to_xarray()

        if i == 0:
            outds = gridded_ds
        else:
            outds = xa.concat((outds, gridded_ds), dim='time')

    outds = outds.assign_coords({'time':dates})
    return outds
    # if j == 0:
    #     allds = outds
    # else:
    #     allds = xa.merge([allds, outds])

In [42]:
dataset_list = Parallel(n_jobs=32)(delayed(process_obsname)(obs) for obs in obs_name_list)
allds = xa.merge(dataset_list)
print('Process Finished')

processing pace_aodprocessing viirs_aod_dt_npp

processing viirs_aod_db_npp
processing modis_aqua_aodprocessing viirs_aod_dt_n20

processing viirs_aod_db_n20
processing modis_terra_aod


120it [00:26,  4.53it/s]
120it [00:27,  4.35it/s]
120it [01:00,  1.98it/s]
120it [01:03,  1.90it/s]
120it [01:06,  1.79it/s]
120it [01:14,  1.62it/s]
120it [01:15,  1.59it/s]


Process Finished


In [43]:
allds.to_netcdf('/glade/work/swei/projects/mmm.pace_aod/aod_obs_stats.m2.202411.nc4')