# NRT data

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

import uscrn

## Load recent hourly data

With {func}`uscrn.get_nrt_data`, we can load recent data (near-real-time) from USCRN
by specifying the period we want and from which dataset.

In [None]:
now = pd.Timestamp.now("UTC")
print(now)

# Time in the filename, used for selection, is the next hour (end of the period),
# while the times in the data are left-labelled (start of the hour).
df = uscrn.get_nrt_data(
    (now.floor("h") - pd.Timedelta('6h'), None),
    "hourly",
)
df

It seems that, sometimes, a few sites can have times an hour earlier than the others.

In [None]:
def func(x):
    nx = len(x)
    if nx == 0:
        return ""
    elif nx < 10:
        return sorted(x.unique())
    else:
        return f"..."

(
    df.utc_time
    .value_counts()
    .sort_index()
    .to_frame()
    .assign(
        wbans=df.groupby("utc_time")["wban"].apply(func),
    )
)

## Plot temperature change time series

In [None]:
fig, ax = plt.subplots(figsize=(7, 4))

df_ = df.copy()
df_["utc_time_mid"] = df_["utc_time"] + pd.Timedelta("30min")
df_["t_hr_avg_k"] = df_["t_hr_avg"].add(273.15)
df_["dt_hr_avg"] = df_["t_hr_avg_k"].sub(df_.groupby("wban")["t_hr_avg_k"].transform("mean"))

df_[["utc_time", "dt_hr_avg"]].groupby("utc_time").mean().plot(
    color="0.3",
    linewidth=3,
    zorder=10,
    legend=False,
    ax=ax,
)

df_.groupby("wban").plot(
    x="utc_time",
    y="dt_hr_avg",
    color="0.5",
    linewidth=1,
    alpha=0.4,
    legend=False,
    xlabel="Time (UTC)",
    ylabel="NRT temperature anomaly  $\Delta T$  (°C)",
    ax=ax,
)

ax.set_title(df.attrs["title"], loc="left", size=8);

## Plot current temperature

In [None]:
fig, ax = plt.subplots(figsize=(7, 4.5))

# TODO: drop these dupes in `get_nrt_data`?
display(df[df.duplicated(["wban", "utc_time"], keep=False)])

ds = uscrn.to_xarray(df.drop_duplicates(["wban", "utc_time"], keep="last"))

# TODO: lat/lon don't have attrs?
ds.isel(time=-1).plot.scatter(x="longitude", y="latitude", hue="t_hr_avg", ax=ax);

## Daily data

In [None]:
# Time in the filename, used for selection, is 23:59 (end of the day).
# Files are usually uploaded a day or so later, a bit after midnight.
df = uscrn.get_nrt_data(
    (now.floor("d") - pd.Timedelta('2d'), None),  # TODO: select with -1
    "daily",
    n_jobs=1,
)
df

In [None]:
fig, ax = plt.subplots(figsize=(7, 4.5))

df.plot.scatter(x="longitude", y="latitude", c="t_daily_avg", cmap="viridis", ax=ax);