In [None]:
# GOES_ABI.ipynb - for 2015 data (there are changes in the ABI data format between 2015 and 2018)

# This programme opens the ABI files (which are in HDF format) and grids the data into 0.5° x 0.5° grids,
# the same as the FLEXPART output. It then saves the output in netCDF format.

# C. Mackay March 2023 (Catherine.Mackay@aero.obs-mip.fr)
# https://github.com/ckmackay/SOFT-IO-LI.git

#Suggestions/improvements to be made:

# could automatically loop over several days if required.


In [None]:
import numpy as np
import pandas as pd
import os.path
from datetime import datetime
import xarray as xr
from io import StringIO
import dask.array as da
from pyhdf import SD

In [None]:
from pyhdf import SD
dsets = ['Latitude', 'Longitude', 'Brightness_Temperature']
def open_hdf4(url):
    hdf = SD.SD(str(url))
    #print(hdf)
    dic = {}
    for dsets, (dims, *_) in hdf.datasets().items():
        #for v in dsets:
         #   print(v)
        hdf_v = hdf.select(dsets)
        #print(hdf_v)
        fill_value = hdf_v.getfillvalue()
        val = hdf_v.get()
        val = np.where(val != fill_value, val, np.nan)
        dic[dsets] = (dims, val)
    ds = xr.Dataset(dic)
    return ds

In [None]:
#inputs

day = '185'
date = '2015-07-04'
idir = "/o3p/macc/test/ABI/2015/"+day+"/"
#print(idir)

In [None]:
#So as to avoid the problem of missing files, get filenames from the input directory and only use these

filenames=[]
filenames = os.listdir(idir)
print(len(filenames))

83


In [None]:

coords = open_hdf4("/o3p/macc/test/ABI/2015/GOES-0750.C1.4km.hdf")
for i in range(len(filenames)):    
    f = filenames[i].split('T')
    s = f[1].split('_')
    #print(s[0])
    bt = open_hdf4('/o3p/macc/test/ABI/2015/'+day+'/'+filenames[i])
    bt_da = bt.Brightness_Temperature.rename(dict(NbLines='Nlin', NbColumns='Ncol'))
    ds = coords.assign(Brightness_Temperature=bt_da)
    
    latitude = np.linspace(-89.95, 89.95, 1800)
    longitude = np.linspace(-178.95, 180.95, 3600)

    lat_coord = xr.DataArray(latitude, coords={'latitude': latitude}, dims=['latitude'])
    lon_coord = xr.DataArray(longitude, coords={'longitude': longitude}, dims=['longitude'])

    # Enrich the dataset ds with extra non-dim coordinates (which behaves like variables more or less): latitude and longitude.
    # They are a coarsed version of flash_lat, flash_lon, as we align them to 1.0/1.0 deg grid.
    # This is done using the method .sel of the DataArray lat_coord / lon_coord; note the parameter method='nearest'.
    ds['latitude'] = lat_coord.sel(latitude=ds.Latitude, method='nearest').where(ds.Latitude.notnull())
    ds['longitude'] = lon_coord.sel(longitude=ds.Longitude, method='nearest').where(ds.Longitude.notnull())
    # Let's get rid of auxilary, non-dimensional coordinates (like flash_lat, flash_lon, flash_id, etc.), as we will no longer need them.
    # This is however an optional step.
    ds = ds.reset_coords(drop=True)

    # Since the variables in the dataset depends only on one dimension, it is straighforwad to convert it to a pandas' dataframe.
    # The reason for doing this is that pandas, contrary to xarray, supports multivariable grouping (xarray allows grouping along a single variable only).
    df = ds.to_dataframe()
    df = df[['Brightness_Temperature', 'latitude', 'longitude']]
    df = df.reset_index(drop=True)
    # Do grouping and extract the flash_energy variable...
    brightness_temperature_grouped = df.groupby(by=['latitude', 'longitude'], sort=True)['Brightness_Temperature']
    # ...and perform aggregation we need
    brightness_temperature_binned = brightness_temperature_grouped.mean()
    brightness_temperature_binned = pd.DataFrame.from_dict({'brightness_temperature': brightness_temperature_binned})
    # Convert back the resulting pandas' dataframe into xarray's dataset
    dr_loc = xr.Dataset.from_dataframe(brightness_temperature_binned)
    dr = xr.Dataset(data_vars={'brightness_temperature': (['latitude', 'longitude'], np.full(shape=(len(latitude), len(longitude)), fill_value=np.nan))}, 
                coords={'latitude': latitude, 
                        'longitude': longitude})
    for v in dr: # iterate thru the variables 'flash_energy', 'num_flash'
        dr[v].loc[dict(longitude=dr_loc.longitude, latitude=dr_loc.latitude)] = dr_loc[v] # performs the embedding
    dr.to_netcdf('/o3p/macc/test/BTemp/2015/'+day+'_all/BT-'+day+'-'+s[0]+'.nc')
    print("Done", [i])

---- bt -----
 <xarray.Dataset>
Dimensions:                 (NbLines: 2806, NbColumns: 2806, NbCounts: 65536)
Dimensions without coordinates: NbLines, NbColumns, NbCounts
Data variables:
    Brightness_Temperature  (NbLines, NbColumns) float64 nan nan nan ... nan nan
    ScanTime                (NbLines) float32 -46.38 -45.92 ... 1.246e+03
    SDS_To_Input_Count      (NbCounts) float64 nan nan nan nan ... nan nan nan
    SDS_To_Native_Count     (NbCounts) float64 5.554e+04 nan nan ... nan nan nan

----- bt_da -----
 <xarray.DataArray 'Brightness_Temperature' (Nlin: 2806, Ncol: 2806)>
array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])
Dimensions without coordinates: Nlin, Ncol


In [None]:
#Testing
# let's compare with the results obtained by other methods:
#dr2 = xr.load_dataset('/home/macc/151/BTemp/151/BT-151-02-30.nc')
#dr3 = xr.load_dataset('/home/macc/151/BTemp/151/BT-151-02-30-00.nc')
#xr.testing.assert_allclose(dr, dr2)
#xr.testing.assert_allclose(dr, dr3)
#xr.testing.assert_allclose(dr2, dr3)