In [10]:
# GLM_05deg_grouping_by_lon_lat.ipynb 

# This programme opens the GLM files and grids the data into 0.5° x 0.5° grids,
# the same as the FLEXPART output. It then saves the output in netCDF format.

# C. Mackay March 2023 (Catherine.Mackay@aero.obs-mip.fr)
# https://github.com/ckmackay/SOFT-IO-LI.git

#Suggestions/improvements to be made:

# could automatically loop over several days if required.

# lon et lat en dur !!

In [2]:
import numpy as np
import xarray as xr
import tqdm
import pathlib
import os.path

In [3]:
idir = "/o3p/macc/glm/OR_GLM-L2-LCFA_G16_s2018154"

In [4]:
#So as to avoid the problem of missing files, get filenames from the input directory and only use these

filenames=[]
s = []
filenames = os.listdir(idir)
print(len(filenames))


23


In [6]:
#################
print(sorted(filenames))
#################

['GLM_array_154_00-01.nc', 'GLM_array_154_01-02.nc', 'GLM_array_154_02-03.nc', 'GLM_array_154_03-04.nc', 'GLM_array_154_04-05.nc', 'GLM_array_154_05-06.nc', 'GLM_array_154_06-07.nc', 'GLM_array_154_07-08.nc', 'GLM_array_154_08-09.nc', 'GLM_array_154_09-10.nc', 'GLM_array_154_10-11.nc', 'GLM_array_154_11-12.nc', 'GLM_array_154_12-13.nc', 'GLM_array_154_13-14.nc', 'GLM_array_154_14-15.nc', 'GLM_array_154_15-16.nc', 'GLM_array_154_16-17.nc', 'GLM_array_154_17-18.nc', 'GLM_array_154_19-20.nc', 'GLM_array_154_20-21.nc', 'GLM_array_154_21-22.nc', 'GLM_array_154_22-23.nc', 'GLM_array_154_23-00.nc']


In [16]:
#latitude = np.linspace(-89.5, 89.5, 180) # 1.0° resolution
#longitude = np.linspace(-178.5, 180.5, 360) # 1.0° resolution

In [5]:
latitude = np.linspace(-89.75, 89.75, 360) # 0.5°resolution
longitude = np.linspace(-179.75, 179.75, 720) # 0.5°resolution

In [18]:
#latitude = np.linspace(-89.75, 90, 720) # 0.25°resolution
#longitude = np.linspace(-179.75, 180, 1440) # 0.25°resolution

In [None]:
for i in range(len(filenames)):
    f = filenames[i].split('_')
    print(filenames[i])
    s = (f[3].split('.'))
    print(s[0])
    dr = xr.Dataset(data_vars={'flash_energy': (['latitude', 'longitude'], np.zeros(shape=(len(latitude), len(longitude)))), 
                           'num_flash': (['latitude', 'longitude'], np.zeros(shape=(len(latitude), len(longitude)), dtype=int))}, # we want num_flash to be integer
                coords={'latitude': latitude, 
                        'longitude': longitude})

    """ pourquoi fait dr en deux fois ? pourrait juste faire en une avec ça ?? """;
    #Just keep flash and group information
    dr = xr.Dataset(data_vars={'flash_energy': (['latitude', 'longitude'], np.zeros(shape=(len(latitude), len(longitude)))), 
                           'num_flash': (['latitude', 'longitude'], np.zeros(shape=(len(latitude), len(longitude)), dtype=int)),
                          'group_energy': (['latitude', 'longitude'], np.zeros(shape=(len(latitude), len(longitude)))), 
                           'num_group': (['latitude', 'longitude'], np.zeros(shape=(len(latitude), len(longitude)), dtype=int))}, # we want num_flash to be integer
                coords={'latitude': latitude, 
                        'longitude': longitude})

    # open glm dataset and add latitute, longitude coordinates (0.5 deg)
    with xr.open_dataset(str(pathlib.PurePath(idir, 'GLM_array_154_'+s[0]+'.nc'))) as ds:
        ds_with_lat_lon_bins = ds.assign_coords({'latitude': dr.latitude.sel(latitude=ds.flash_lat, method='nearest'), 
                                                 'longitude': dr.longitude.sel(longitude=ds.flash_lon, method='nearest')})
        # group flashes by latitude
        flash_energy_by_lat = ds_with_lat_lon_bins.flash_energy.groupby('latitude')
        # for each 0.5deg latitude, group flashes by longitude, calculate flash_energy_sum and put it in dr dataset with nb of flashes for each latitude-longitude group
        for lat, flash_energy_for_lat in tqdm.tqdm(flash_energy_by_lat):
            flash_energy_for_lat_by_lon = flash_energy_for_lat.groupby('longitude')
            flash_energy_sum = flash_energy_for_lat_by_lon.sum()          
            dr.flash_energy.loc[dict(latitude=lat, longitude=flash_energy_sum.longitude)] = flash_energy_sum            
            flash_energy_count = flash_energy_for_lat_by_lon.count()
            dr.num_flash.loc[dict(latitude=lat, longitude=flash_energy_count.longitude)] = flash_energy_count

    dr.to_netcdf('/o3p/macc/test/GLM_array_154_05deg/GLM_array_154_'+s[0]+'_batch_bis.nc')  



In [25]:
# let's compare with the results obtained by other methods:
#dr2 = xr.load_dataset('/home/macc/test/GLM_array_test/test_GLM_array_151_20-21_batch_bis.nc')
#dr3 = xr.load_dataset('/home/macc/test/GLM_array_151/GLM_array_151_20-21_batch_bis.nc')
#xr.testing.assert_allclose(dr, dr2)
#xr.testing.assert_allclose(dr, dr3)
#xr.testing.assert_allclose(dr2, dr3)