# Select sites

With {func}`uscrn.get_data`, it is possible to select sites, to avoid downloading data you don't want.

In [None]:
import matplotlib.pyplot as plt

import uscrn

We can use the site metadata info from {func}`uscrn.load_meta` to determine which sites we want to load.

In [None]:
meta = uscrn.load_meta()
meta.head()

In [None]:
meta.info()

## Single site

In [None]:
(
    meta.query("state == 'CO' and operation == 'Operational'")
    .sort_values(by="location")
)

In [None]:
%%time

station_id = "1045"  # Boulder, CO

assert meta.station_id.nunique() == len(meta)
assert meta.set_index("station_id").at[station_id, "location"] == "Boulder"

df = uscrn.get_data(range(2015, 2025), "daily", station_id=station_id, n_jobs=2)

In [None]:
df

In [None]:
vn = "t_daily_max"

attrs = df.attrs["attrs"][vn]
s = df.set_index("lst_date")[vn]

_, ax = plt.subplots(figsize=(9, 4))
s.plot(ax=ax, lw=0.5, alpha=0.35, color="C0")
s.rolling("30D").mean().plot(ax=ax, color="C0")
ax.set_xlabel("")
ax.set_ylabel(f"{attrs['long_name']}\n[{attrs['units']}]");

## Sites in a state

In [None]:
%%time

station_ids = meta.query("state == 'TX'").station_id.tolist()
print(station_ids)

df = uscrn.get_data(2023, "daily", station_id=station_ids, n_jobs=2)

In [None]:
df

In [None]:
vn = "t_daily_max"

attrs = df.attrs["attrs"][vn]

(
    df.assign(rounded_latitude=df.latitude.round(1).astype(str))
    .boxplot(vn, by="rounded_latitude")
)
plt.gca().set_ylabel(f"{attrs['long_name']}\n[{attrs['units']}]");