# NRT data

In [None]:
import time

import matplotlib.pyplot as plt
import pandas as pd

import uscrn

## Recent hourly data

With {func}`uscrn.get_nrt_data`, we can load recent data (near-real-time) from USCRN
by specifying the period we want and from which dataset.
Here, we request the 6 most recent files.

In [None]:
now = pd.Timestamp.now("UTC")
print(now)

df = uscrn.get_nrt_data(
    (-6, None),
    "hourly",
    n_jobs=2,
)

In [None]:
df

Sometimes, a few sites can have times an hour earlier than the others.
See the notes in {func}`uscrn.get_nrt_data` for more details.

In [None]:
def func(x):
    nx = len(x)
    if nx == 0:
        return ""
    elif nx < 10:
        return sorted(x.unique())
    else:
        return f"..."

(
    df.utc_time
    .value_counts()
    .sort_index()
    .to_frame()
    .assign(
        wbans=df.groupby("utc_time")["wban"].apply(func),
    )
)

In these files, for example, site WBAN 13301 has data for multiple earlier _days_ included.

* <https://www.ncei.noaa.gov/pub/data/uscrn/products/hourly02/updates/2024/CRN60H0203-202402212000.txt>
* <https://www.ncei.noaa.gov/pub/data/uscrn/products/hourly02/updates/2024/CRN60H0203-202402222000.txt>

### Plot temperature change time series

In [None]:
fig, ax = plt.subplots(figsize=(7, 4))

df_ = df.copy()

tmax = df_.utc_time.max()
df_ = df_[df_.utc_time.between(tmax - pd.Timedelta("7h"), tmax)]

df_["utc_time_mid"] = df_["utc_time"] + pd.Timedelta("30min")
df_["t_hr_avg_k"] = df_["t_hr_avg"].add(273.15)
df_["dt_hr_avg"] = df_["t_hr_avg_k"].sub(df_.groupby("wban")["t_hr_avg_k"].transform("mean"))

df_[["utc_time", "dt_hr_avg"]].groupby("utc_time").mean().plot(
    color="0.3",
    linewidth=3,
    zorder=10,
    legend=False,
    ax=ax,
)

df_.groupby("wban").plot(
    x="utc_time",
    y="dt_hr_avg",
    color="0.5",
    linewidth=1,
    alpha=0.4,
    legend=False,
    xlabel="Time (UTC)",
    ylabel="NRT temperature anomaly  $\Delta T$  (°C)",
    ax=ax,
)

ax.set_title(df.attrs["title"], loc="left", size=8);

### Plot current temperature

In [None]:
fig, ax = plt.subplots(figsize=(7, 4.5))

ds = uscrn.to_xarray(df)

ds.isel(time=-1).plot.scatter(x="longitude", y="latitude", hue="t_hr_avg", ax=ax);

## Specific period of hourly data

Date selection works by file, not by the data inside the file.
In general, the data are an hour behind the file date/time.
See the notes in {func}`uscrn.get_nrt_data` for more details.

In [None]:
def get_nrt_hourly_period(period):
    a, b = period
    ap1 = pd.to_datetime(a) + pd.Timedelta(hours=1)
    bp1 = pd.to_datetime(b) + pd.Timedelta(hours=1)

    df = uscrn.get_nrt_data((ap1, bp1))

    time.sleep(0.5)  # for prints
    in_period = df.utc_time.between(a, b)
    print(
        f"Got {in_period.sum()}/{len(df)} ({in_period.sum() / len(df):.1%}) records "
        f"in desired period {a} to {b}"
    )
    outside = df.loc[~in_period, "utc_time"].value_counts()
    print(
        "Outside counts:",
        ", ".join(f"{time:%Y-%m-%d %H} ({count})" for time, count in outside.items())
    )

    dupe = df.duplicated(["wban", "utc_time"], keep=False)
    print(f"Got {dupe.sum()} ({dupe.sum() / len(df):.1%}) duplicates")

    return (
        df[in_period]
        .drop_duplicates(["wban", "utc_time"], keep="last")
        .reset_index(drop=True)
    )


df = get_nrt_hourly_period(("2024-02-09 16", "2024-02-09 20"))

In [None]:
df

## Recent daily data

Here, we load the most recent daily data file.

In [None]:
df = uscrn.get_nrt_data(
    -1,
    "daily",
    n_jobs=1,
)

In [None]:
df

In [None]:
fig, ax = plt.subplots(figsize=(7, 4.5))

ds = uscrn.to_xarray(df).squeeze()

ds.plot.scatter(x="longitude", y="latitude", hue="t_daily_max", ax=ax);