In [None]:
# GOES_ABI.ipynb - for 2018 data (there are changes in the ABI data format between 2015 and 2018)

# This programme opens the ABI files (which are in HDF format) and grids the data into 0.5° x 0.5° grids,
# the same as the FLEXPART output. It then saves the output in netCDF format.

# C. Mackay March 2023 (Catherine.Mackay@aero.obs-mip.fr)
# https://github.com/ckmackay/SOFT-IO-LI.git

#Suggestions/improvements to be made:

# could automatically loop over several days if required.

"""
update juju: (28-13-2023)
- add comments to compare GOES_ABI.ipynb and GOES_ABI_2015.ipynb
--> différence: PAS même source de coords 
        ==> pour 2018 (GOES16 ?) : open_hdf4("/o3p/macc/test/GOESNG-0750.2km.hdf")
        ==> pour 2015 (GOES13 ?) : open_hdf4("/o3p/macc/test/ABI/2015/GOES-0750.C1.4km.hdf") 
""";

In [1]:
import numpy as np
import pandas as pd
import os.path
from datetime import datetime
import xarray as xr
from io import StringIO
import dask.array as da
from pyhdf import SD

In [2]:
# idem GOES_ABI.ipynb and GOES_ABI_2015.ipynb
dsets = ['Latitude', 'Longitude', 'Brightness_Temperature']
def open_hdf4(url):
    hdf = SD.SD(str(url))
    #print(hdf)
    dic = {}
    for dsets, (dims, *_) in hdf.datasets().items():
        #for v in dsets:
         #   print(v)
        hdf_v = hdf.select(dsets)
        #print(hdf_v)
        fill_value = hdf_v.getfillvalue()
        val = hdf_v.get()
        val = np.where(val != fill_value, val, np.nan)
        dic[dsets] = (dims, val)
    ds = xr.Dataset(dic)
    return ds

In [3]:
#inputs

day = '152'
date = '2018-06-01'
idir = "/o3p/macc/test/ABI/"+day+"/"
#print(idir)


In [4]:
#So as to avoid the problem of missing files, get filenames from the input directory and only use these

filenames=[]
filenames = os.listdir(idir)
print(len(filenames))

96


In [39]:
#################### FOR TESTING
#idir = "/o3p/macc/test/ABI/temp/"
#coords = open_hdf4("/o3p/macc/test/GOESNG-0750.2km.hdf")
##for i in range(len(filenames)):    
#for i in range(13,14):
#    f = filenames[i].split('T')
#    s = f[1].split('_')
#    #print(s[0])
#    bt = open_hdf4('/o3p/macc/test/ABI/temp/'+filenames[i])
#    print(filenames[i])

In [40]:
#################### FOR TESTING
#bt

In [41]:
coords = open_hdf4("/o3p/macc/test/GOESNG-0750.2km.hdf")

for i in range(len(filenames)):    

    f = filenames[i].split('T')
    s = f[1].split('_')
    #print(s[0])
    bt = open_hdf4('/o3p/macc/test/ABI/'+day+'/'+filenames[i])
    bt_da = bt.Brightness_Temperature.rename(dict(NbLines='Nlin', NbColumns='Ncol'))
    ds = coords.assign(Brightness_Temperature=bt_da)
    """
    on peut sûrement just garder Latitude et Longitude, les autres data_variables nous servent pas
    """
    
    #latitude = np.linspace(-89.95, 89.95, 1800)
    #longitude = np.linspace(-178.95, 180.95, 3600)
    
    #set to 0.5° by 0.5°
    
    latitude = np.linspace(-89.75, 89.75, 360) # 0.5°resolution
    longitude = np.linspace(-179.75, 179.75, 720) # 0.5°resolution

    lat_coord = xr.DataArray(latitude, coords={'latitude': latitude}, dims=['latitude'])
    lon_coord = xr.DataArray(longitude, coords={'longitude': longitude}, dims=['longitude'])

    # Enrich the dataset ds with extra non-dim coordinates (which behaves like variables more or less): latitude and longitude.
    # They are a coarsed version of flash_lat, flash_lon, as we align them to 1.0/1.0 deg grid.
    # This is done using the method .sel of the DataArray lat_coord / lon_coord; note the parameter method='nearest'.
    ds['latitude'] = lat_coord.sel(latitude=ds.Latitude, method='nearest').where(ds.Latitude.notnull())
    ds['longitude'] = lon_coord.sel(longitude=ds.Longitude, method='nearest').where(ds.Longitude.notnull())
    # Let's get rid of auxilary, non-dimensional coordinates (like flash_lat, flash_lon, flash_id, etc.), as we will no longer need them.
    # This is however an optional step.
    """ <!> sert à rien ici car toutes les data_vars dépendent des dim Nlin et Ncol <!>"""
    ds = ds.reset_coords(drop=True) 

    # Since the variables in the dataset depends only on one dimension, it is straighforwad to convert it to a pandas' dataframe.
    # The reason for doing this is that pandas, contrary to xarray, supports multivariable grouping (xarray allows grouping along a single variable only).
    df = ds.to_dataframe()
    df = df[['Brightness_Temperature', 'latitude', 'longitude']]
    df = df.reset_index(drop=True)
    # Do grouping and extract the flash_energy variable...
    brightness_temperature_grouped = df.groupby(by=['latitude', 'longitude'], sort=True)['Brightness_Temperature']
    # ...and perform aggregation we need
    brightness_temperature_binned = brightness_temperature_grouped.mean()
    brightness_temperature_binned = pd.DataFrame.from_dict({'brightness_temperature': brightness_temperature_binned})
    # Convert back the resulting pandas' dataframe into xarray's dataset
    dr_loc = xr.Dataset.from_dataframe(brightness_temperature_binned)
    dr = xr.Dataset(data_vars={'brightness_temperature': (['latitude', 'longitude'], np.full(shape=(len(latitude), len(longitude)), fill_value=np.nan))}, 
                coords={'latitude': latitude, 
                        'longitude': longitude})
    for v in dr: # iterate thru the variables 'flash_energy', 'num_flash'
        dr[v].loc[dict(longitude=dr_loc.longitude, latitude=dr_loc.latitude)] = dr_loc[v] # performs the embedding
    #dr.to_netcdf('/o3p/macc/test/BTemp/'+day+'/BT-'+day+'-'+s[0]+'.nc')
    dr.to_netcdf('/o3p/macc/test/BTemp/'+day+'/BT-'+day+'-'+s[0]+'.nc')
    print("Completed", [i])

Completed [0]
Completed [1]
Completed [2]
Completed [3]
Completed [4]
Completed [5]
Completed [6]
Completed [7]
Completed [8]
Completed [9]
Completed [10]
Completed [11]
Completed [12]
Completed [13]
Completed [14]
Completed [15]
Completed [16]
Completed [17]
Completed [18]
Completed [19]
Completed [20]
Completed [21]
Completed [22]
Completed [23]
Completed [24]
Completed [25]
Completed [26]
Completed [27]
Completed [28]
Completed [29]
Completed [30]
Completed [31]
Completed [32]
Completed [33]
Completed [34]
Completed [35]
Completed [36]
Completed [37]
Completed [38]
Completed [39]
Completed [40]
Completed [41]
Completed [42]
Completed [43]
Completed [44]
Completed [45]
Completed [46]
Completed [47]
Completed [48]
Completed [49]
Completed [50]
Completed [51]
Completed [52]
Completed [53]
Completed [54]
Completed [55]
Completed [56]
Completed [57]
Completed [58]
Completed [59]
Completed [60]
Completed [61]
Completed [62]
Completed [63]
Completed [64]
Completed [65]
Completed [66]
Compl

In [9]:
bt

In [None]:
#Testing
# let's compare with the results obtained by other methods:
#dr2 = xr.load_dataset('/home/macc/151/BTemp/151/BT-151-02-30.nc')
#dr3 = xr.load_dataset('/home/macc/151/BTemp/151/BT-151-02-30-00.nc')
#xr.testing.assert_allclose(dr, dr2)
#xr.testing.assert_allclose(dr, dr3)
#xr.testing.assert_allclose(dr2, dr3)