# POC (b): aggregate over shape

## Copy request_api and mask function

In [1]:
import cdsapi
import time

# a wrapper function to request ERA5 API
# f_name: the name of the file to be requested
# shape: the ARBITRARY shape of the area to be requested
#        if None, the whole world will be queried
#        otherwise, the request ERA5 API with the bounding box of the shape
# return: the path of the file downloaded
def request_era5_api(f_name, shape=None):
    fpath = f'data/download/{f_name}'
    if shape is None:
        area = [90, -180, -90, 180]
    else:
        west, sorth, east, north = shape.bounds
        area = [north, west, sorth, east]

    print('\n### ~~~~~~ ###')
    print('START requesting ERA5 API')
    start = time.time()

    c = cdsapi.Client()
    c.retrieve('reanalysis-era5-single-levels', {
        'product_type': 'reanalysis',
        'format': 'netcdf',
        'variable': '2m_temperature',
        'year': '2023',
        'month': '01',
        'day': '05',
        'time': [
            '00:00',
            '01:00',
            '02:00',
            '03:00',
            '04:00',
            '05:00',
            '06:00',
            '07:00',
            '08:00',
            '09:00',
            '10:00',
            '11:00',
            '12:00',
            '13:00',
            '14:00',
            '15:00',
            '16:00',
            '17:00',
            '18:00',
            '19:00',
            '20:00',
            '21:00',
            '22:00',
            '23:00',
        ],
        'area': area,
    }, fpath)

    end = time.time()
    print(f'DONE requesting ERA5 API in {end - start} seconds')
    print('### ~~~~~~ ###\n')

    return fpath

import numpy as np
import pandas as pd
import geopandas as gpd
import xarray as xr
import folium
import time

# a function to mask the raster data with arbitrary shape
# fpath: the path of the raster data
# shape: the ARBITRARY shape
# return: the mask of the raster data, which is a boolean 2-D array
def shape_mask(fpath, shape):
    print('\n### ~~~~~~ ###')
    print('START masking raster data with arbitrary shape')
    start = time.time()

    gdf_shape = gpd.GeoDataFrame(geometry=[shape], crs=4326)

    ds = xr.open_dataset(fpath)
    print(f'shape of the whole raster data: {ds["t2m"].shape}')
    ds_2d = ds.isel(time=0)  # using a 2-D slice of the raster to construct the geospatail content of the pixels
    print(f'shape of the sliced raster data: {ds_2d["t2m"].shape}')

    # take a record of the lat/lon location in the raster data
    df_lat = pd.DataFrame(enumerate(ds_2d['latitude'].values), columns=['lat_index', 'latitude'])
    df_lon = pd.DataFrame(enumerate(ds_2d['longitude'].values), columns=['lon_index', 'longitude'])

    df_2d = ds_2d.to_dataframe()
    df_2d = df_2d.reset_index()
    df_2d = df_2d[['latitude', 'longitude', 't2m']]
    df_2d = df_2d.merge(df_lat, on='latitude')
    df_2d = df_2d.merge(df_lon, on='longitude')
    gdf_2d = gpd.GeoDataFrame(
        df_2d,
        geometry=gpd.points_from_xy(df_2d.longitude, df_2d.latitude),
        crs=4326,
    )  # construct the geospatail dataframe of the pixels
    gdf_masked = gdf_2d.sjoin(gdf_shape, how='inner', predicate="within")  # join the pixels with the shape, use GeoPandas spatial join

    # construct the mask of based on the lat/lon location of the pixels within the shape
    lat_index = gdf_masked['lat_index'].values
    lon_index = gdf_masked['lon_index'].values
    mask = np.zeros(ds_2d['t2m'].shape)
    mask[lat_index, lon_index] = 1
    mask = mask.astype(bool)

    end = time.time()
    print(f'DONE masking raster data with arbitrary shape in {end - start} seconds')
    print('### ~~~~~~ ###')

    # visualize the shape, all pixels and masked pixels
    m = gdf_shape.explore(name='shape', tiles='Stamen Terrain')
    gdf_2d.explore(m=m, column='t2m', name='api_request_points', cmap='Oranges')
    gdf_masked.explore(m=m, column='t2m', name='masked_points', cmap='Blues')
    folium.LayerControl().add_to(m)
    # m.save('poc_a_map.html')
    return mask, m

# Main function: aggregate in arbitrary shape

In [2]:
import netCDF4 as nc

def agg_in_shape(shape):
    # request the ERA5 API
    fpath = request_era5_api('poc_b.nc', shape)

    # mask the raster data with arbitrary shape
    mask, m = shape_mask(fpath, shape)

    # open the file
    ds = nc.Dataset(fpath)

    # read the temperature raster
    all_temperature = ds['t2m'][:]
    print('\nBEFORE masking')
    print(f'shape: {all_temperature.shape}')
    print(f'count of pixels: {all_temperature.count()}')
    print(f'min: {all_temperature.min()}')
    print(f'max: {all_temperature.max()}')
    print(f'mean: {all_temperature.mean()}')
    print(f'std: {all_temperature.std()}')

    # mask the raster data
    all_temperature.mask = ~mask
    print('\nAFTER masking')
    print(f'shape: {all_temperature.shape}')
    print(f'count of pixels: {all_temperature.count()}')
    print(f'min: {all_temperature.min()}')
    print(f'max: {all_temperature.max()}')
    print(f'mean: {all_temperature.mean()}')
    print(f'std: {all_temperature.std()}')
    

# Working example

In [3]:
import geopandas as gpd

gdf = gpd.read_file('data/vector/greenland_main_island.geojson')  # read the shape from a geojson file
shape = gdf.loc[0, 'geometry']  # get an object of an arbitrary shape

agg_in_shape(shape)


### ~~~~~~ ###
START requesting ERA5 API


2023-07-11 14:23:01,319 INFO Welcome to the CDS
2023-07-11 14:23:01,320 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2023-07-11 14:23:01,588 INFO Request is completed
2023-07-11 14:23:01,589 INFO Downloading https://download-0003-clone.copernicus-climate.eu/cache-compute-0003/cache/data6/adaptor.mars.internal-1689093330.3342187-10333-14-e3f17986-ec9e-42c7-a296-b89ca9e8264c.nc to data/download/poc_b.nc (1.1M)
2023-07-11 14:23:03,164 INFO Download rate 698.9K/s 


DONE requesting ERA5 API in 3.218372106552124 seconds
### ~~~~~~ ###


### ~~~~~~ ###
START masking raster data with arbitrary shape
shape of the whole raster data: (24, 96, 244)
shape of the sliced raster data: (96, 244)
DONE masking raster data with arbitrary shape in 0.18700170516967773 seconds
### ~~~~~~ ###

BEFORE masking
shape: (24, 96, 244)
count of pixels: 562176
min: 228.93496704101562
max: 281.08119201660156
mean: 252.67160863433028
std: 13.578592449175936

AFTER masking
shape: (24, 96, 244)
count of pixels: 244728
min: 228.93496704101562
max: 274.79735427634546
mean: 242.42570170121715
std: 8.410893044220265
