# Point extraction of 20km WRF data

Extracting data from WRF datasets at points given by latitude and longitude coordinates.

In [1]:
import xarray as xr
from pyproj import Transformer
from pyproj.crs import CRS
import pandas as pd

In [2]:
# path to your WRF data file
fp = "/rcs/project_data/wrf_data/daily/pcpt/pcpt_daily_wrf_ERA-Interim_historical_1979.nc"

# just giving these some names for column names of pandas df later
wgs84_coords = {
    "p1": (65.857, -147.86),
    "p2": (60.128, -149.417),
    "p3": (66.565, -152.643),
    "p4": (59.24, -135.51),
    "p5": (67.37, -165.54),
}

The WRF crs is obtained from the proj4 string in the dataset attributes, and then the `pyproj.Transformer` class can be used to project the coordinates to the CRS used in the `xarray` dataset. 

The `xarray.DataSet` has a `.sel` method to query the data, but make sure to use `method="nearest"` to choose the nearest grid cell to the specified coordinates, otherwise it will look for the specified coordinate exactly. 

In [3]:
with xr.open_dataset(fp) as ds:
    # project WGS84 coordinates using proj string from WRF file
    wrf_proj_str = ds.attrs["proj_parameters"]
    wrf_crs = CRS.from_proj4(wrf_proj_str)
    transformer = Transformer.from_crs("epsg:4326", wrf_crs)
    wrf_coords = {
        p_name: transformer.transform(*coords)
        for p_name, coords in wgs84_coords.items()
    }

    # query xarray dataset using "method" parameter to
    #   choose nearest cell to each coordinate
    pcpt_data = {
        p_name: ds.pcpt.sel(xc=coords[0], yc=coords[1], method="nearest").values
        for p_name, coords in wrf_coords.items()
    }

    # make a pandas dataframe with time series from points as columns
    df = pd.DataFrame(pcpt_data, index=ds.time.values)

In [4]:
df.head()

Unnamed: 0,p1,p2,p3,p4,p5
1979-01-02,0.006,0.099,1.492,0.063,1.08
1979-01-03,0.004,0.0,0.002,0.0,0.198
1979-01-04,0.0,0.0,0.0,0.0,0.0235
1979-01-05,0.0,0.18,0.0,0.0,0.0
1979-01-06,0.006,0.0743,0.0,0.0,0.0302
