# Created a Merged Station Inventory for GAIA / CRESST

Focus on WA State for now

Precip stations https://docs.synopticdata.com/services/metadata

Requires an academic account + token for interfacing with API 


TODO:
- create merged inventory (maybe STAC)
- set end_datetime = NaT if today

In [None]:
import requests
import os
import pandas as pd
import geopandas as gpd

# seismic
from obspy.clients.fdsn import Client
from obspy import UTCDateTime

In [None]:
TOKEN = os.environ.get('SYNOPTIC_TOKEN')

baseAPI = "https://api.synopticdata.com/v2"
endpoint = "/stations/metadata"
url = baseAPI + endpoint

## Precipitation


### SNOTEL 

keep these separate b/c they are special 

In [None]:
# Isolate SNOTEL as separate inventory?
#network=SNOTEL ... must be a number
# https://docs.synopticdata.com/services/networks
# You can find mapping here https://demos.synopticdata.com/providers/index.html
#vars=SWE,SNOWDEPTH
params = dict(state="wa",
              token=TOKEN,
              sensorvars=True, # return sensor variable info
              network=25, # SNOTEL
              output='geojson', # does not return sensor_vars :(
)
response = requests.get(url, params=params)
data = response.json()
gf_snotel = gpd.GeoDataFrame.from_features(data['features'], crs='EPSG:4326')
print(len(gf_snotel))
gf_snotel.head(3)

In [None]:
gf_snotel.status.value_counts()

In [None]:
gf_snotel['start_datetime'] = pd.to_datetime(gf_snotel['period_of_record'].apply(lambda x: x['start']))
gf_snotel['end_datetime'] = pd.to_datetime(gf_snotel['period_of_record'].apply(lambda x: x['end']))
gf_snotel.iloc[0]

In [None]:
# Check if end datetime date is today & remove timezone (UTC from API)

gf_snotel['start_datetime'] = gf_snotel['start_datetime'].dt.tz_localize(None)

def null_if_today(date):
    today_utc = pd.Timestamp.today(tz='UTC').date()
    return pd.NaT if date.tz_localize(None).date() == today_utc else date.tz_localize(None)


gf_snotel['end_datetime'] = gf_snotel['end_datetime'].apply(null_if_today)
gf_snotel.head(3)

In [None]:
# Are inactive very out of date or not?
gf_snotel[gf_snotel.status=='INACTIVE'].sort_values('end_datetime')[['stid','name','start_datetime','end_datetime']]

In [None]:
# NOTE: sometimes you get
# ConnectionError: HTTPSConnectionPool(host='api.synopticdata.com', port=443): Max retries exceeded with url: /v2/stations/metadata?state=wa&token=101c46711eb84aa792716a38e4ca9906&sensorvars=True&network=25&output=json (Caused by NameResolutionError("HTTPSConnection(host='api.synopticdata.com', port=443): Failed to resolve 'api.synopticdata.com' ([Errno 8] nodename nor servname provided, or not known)"))

def add_sensor_variables(gdf, network_code=None):
    """Add sensor variable names as a list in a new column."""
    params = dict(state="wa",
              token=TOKEN,
              sensorvars=True, # return sensor variable info
              network=network_code, # SNOTEL
              output='json', # does not return sensor_vars :(
    )
    response = requests.get(url, params=params)
    data = response.json()
    df = pd.DataFrame(data['STATION'])
    return df.SENSOR_VARIABLES.apply(lambda x: list(x.keys()))

add_sensor_variables(gf_snotel, network_code=25)

In [None]:
# df_snotel.iloc[0].SENSOR_VARIABLES.keys()
gf_snotel['sensor_variables'] = add_sensor_variables(gf_snotel, network_code=25)
gf_snotel.head(3)

In [None]:
# NOTE: not all have wind speed or soil temp for example...
gf_snotel['sensor_variables'].value_counts().head()

In [None]:
# Make links clickable in the station inventory table
gf_snotel['station_info'] = gf_snotel['station_info'].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')

In [None]:
keep_cols = ['geometry','id','stid','name','longitude','latitude','elevation','status','start_datetime','end_datetime','station_info', 'sensor_variables']
gf_snotel[keep_cols].iloc[0]

In [None]:
gf_snotel[keep_cols].explore(popup=True)

In [None]:
gf_snotel[keep_cols].to_file('snotel_stations.geojson', driver='GeoJSON')

## Precipitation

In [None]:
# filter search to stations with precip data
# https://demos.synopticdata.com/variables/index.html
precip_vars = ["precip_accum"] # NOTE: there are a lot! e.g. hourly etc
sensorvars=True

params = dict(state="wa",
              token=TOKEN,
              network='!25', # NOT SNOTEL
              sensorvars=True, # return sensor variable info
              vars=','.join(precip_vars), # restrict to precip only
              output='geojson',
)
response = requests.get(url, params=params)
data = response.json()


In [None]:
gfp = gpd.GeoDataFrame.from_features(data['features'], crs='EPSG:4326')
print('Number of precip stations:', len(gfp))
gfp.head()

In [None]:
# How many different networks for precip.... a lot!
# probably different sensor types too
print('Number of networks:', gfp.mnet_id.nunique())
#gfp.mnet_id.value_counts()

In [None]:
# A rogue statio in Mexico!
gfp = gfp[gfp.stid != 'E0744']

#gfp.explore(column='status',popup=True, cmap=['green','red'])

In [None]:
#gfp.to_file('precip-stations-wa.geojson', driver='GeoJSON')
# For starters drop Inactive and restricted sensors
keep_cols = ['geometry','id','stid','name', 'longitude','latitude','elevation','mnet_id', 'status','period_of_record','station_info','restricted_data']
gfp = gfp[(gfp.status == 'ACTIVE') & (gfp.restricted_data == False)][keep_cols]
print(len(gfp), "active, unrestricted precip stations in WA")
gfp.head()

In [None]:
# Add separate columns for period_of_record start and end
gfp['start_datetime'] = pd.to_datetime(gfp['period_of_record'].apply(lambda x: x['start']))
gfp['end_datetime'] = pd.to_datetime(gfp['period_of_record'].apply(lambda x: x['end']))
new_order = ["id", "stid", "name", "longitude", "latitude", "elevation", "mnet_id", "start_datetime", "end_datetime", "station_info", "geometry"]
gfp = gfp[new_order]

In [None]:
# Check if end datetime date is today & remove timezone (UTC from API)

gfp['start_datetime'] = gfp['start_datetime'].dt.tz_localize(None)
gfp['end_datetime'] = gfp['end_datetime'].apply(null_if_today)
gfp.head(3)

In [None]:
# Add list of variables
gfp['sensor_variables'] = add_sensor_variables(gfp, network_code=None)

In [None]:
gfp['sensor_variables'].iloc[1]

In [None]:
all_sensor_vars = gfp['sensor_variables'].explode()
all_precip_vars = set([x for x in all_sensor_vars if str(x).startswith('precip')])
all_precip_vars

In [None]:
# Lots of different ways of measuring precip... not sure how to handle this yet
for var in all_precip_vars:
    print(f"Number of stations with {var}:",
          len(gfp[gfp['sensor_variables'].apply(lambda x: var in x)]))

In [None]:
# Any without precip? yup!
# But looking at the station website it does have precip, so maybe some metadata is incomplete...
no_precip = gfp[~gfp['sensor_variables'].apply(lambda x: any(var in x for var in all_precip_vars))]
len(no_precip)
no_precip.iloc[0]

In [None]:
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'

In [None]:
# But actually, looking at station page, there is precip. It's just under precip_accum_1 for some reason
# https://explore.synopticdata.com/KMFW1/metadata

def get_single_station_precip_timeseries(stid, start, end, derived=0):
    endpoint = "/stations/timeseries"
    url = baseAPI + endpoint

    params = dict(token = TOKEN,
                stid = stid,
                start = start,
                end = end,
                vars = 'precip',
                precip = derived  # Enable derived precip (precip_accumulated_set_1d, precip_intervals_set_1d)
                #precip =0 (default) 'precip_accum_set_1' (Millimeters) cumulative sum of the interval values for the requested time period, in this case hourly timestamps
    )
    response = requests.get(url, params=params)
    data = response.json()
    print(data)
    print(data['UNITS'])
    df = pd.DataFrame(data['STATION'][0]['OBSERVATIONS'])
    df['date_time'] = pd.to_datetime(df['date_time'])
    return df

# START must be a number in the form YYYYmmddHHMM.

start = pd.Timestamp('2025-12-01').strftime('%Y%m%d%H%M')
end = pd.Timestamp('2025-12-31').strftime('%Y%m%d%H%M')
df = get_single_station_precip_timeseries('KMFW1', start, end, derived=True)

# According to # https://raws.dri.edu/cgi-bin/wea_mnsimts2.pl
# total of 38.1 mm in december
print('Monthly total =', df.precip_accumulated_set_1d.iloc[-1])

#print(df.head())
fig, ax = plt.subplots(figsize=(12,6))
df.plot(ax=ax, x='date_time', y='precip_accumulated_set_1d')
df.plot(ax=ax, x='date_time', y='precip_intervals_set_1d')
plt.ylabel('Millimeters');


In [None]:
# NOTE: if derived=False, starting value != 0
# # weird, according to https://raws.dri.edu/cgi-bin/rawMAIN.pl?waWKRA  72.39 is the accumulated total precip from last 2 months...
# whiich is what we get as a starting value...
df = get_single_station_precip_timeseries('KMFW1', start, end, derived=False)
df.head()
print(df.iloc[0])
fig, ax = plt.subplots(figsize=(12,6))
df.plot(ax=ax, x='date_time', y='precip_accum_set_1')
plt.ylabel('Millimeters');

In [None]:
# Make station_info URL a clickable link
gfp['station_info'] = gfp['station_info'].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')

In [None]:
# Add simple-styling for geojson.io
# https://github.com/mapbox/simplestyle-spec/blob/master/1.1.0/README.md
# gfp['marker-color'] = '#0000FF' # blue
# gfp['marker-size'] = 'small'
# gfp['marker-symbol'] = 'water'

In [None]:
gfp.explore(popup=True)

In [None]:
# Save simplified, styled geojson
gfp.to_file("precip-stations.geojson", driver='GeoJSON')
!ls -tlrh *geojson

## Streamflow

In [None]:
# Search USGS River Gauges:
# filter search to stations with precip data
# https://demos.synopticdata.com/variables/index.html
target_vars = ["stream_flow"]
sensorvars=True

params = dict(state="wa",
              token=TOKEN,
              #sensorvars=True, # Doesn't seem to work for geojson output...
              vars=','.join(target_vars),
              output='geojson',
)
response = requests.get(url, params=params)
data = response.json()


In [None]:
gfs = gpd.GeoDataFrame.from_features(data['features'], crs='EPSG:4326')
print('stations=', len(gfs))
gfs.head()

In [None]:
# Only ACTIVE sensores Add separate columns for period_of_record start and end
gfs = gfs[(gfs.status == 'ACTIVE')][keep_cols]

gfs['start_datetime'] = pd.to_datetime(gfs['period_of_record'].apply(lambda x: x['start']))
gfs['end_datetime'] = pd.to_datetime(gfs['period_of_record'].apply(lambda x: x['end']))

# Check if end datetime date is today & remove timezone (UTC from API)
gfs['start_datetime'] = gfs['start_datetime'].dt.tz_localize(None)
gfs['end_datetime'] = gfs['end_datetime'].apply(null_if_today)
gfs.head(3)



In [None]:
gfs['station_info'] = gfs['station_info'].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')

In [None]:
gfs['sensor_variables'] = add_sensor_variables(gfs, network_code=None)

In [None]:
new_order = ["id", "stid", "name", "longitude", "latitude", "elevation", "mnet_id", "start_datetime", "end_datetime", "station_info", "sensor_variables", "geometry"]
gfs = gfs[new_order]
gfs.head()

In [None]:
gfs.explore()

In [None]:
# gfs['marker-color'] = '#00FFFF' # cyan
# gfs['marker-size'] = 'small'
# gfs['marker-symbol'] = 'waterfall'

In [None]:
# Save simplified, styled geojson
gfs.to_file("streamflow-stations.geojson", driver='GeoJSON')
!ls -tlrh *geojson

In [None]:
#Combine both precip and streamflow
gf = pd.concat([gfp, gfs], ignore_index=True)
print('Total stations (precip + streamflow)=', len(gf))
# gf.head()
# gf.to_file('combined-stations-wa-styled.geojson', driver='GeoJSON')

## Seismic 

In [None]:
client = Client('IRIS')

In [None]:
aoi = gpd.read_file(
    "https://raw.githubusercontent.com/unitedstates/districts/refs/heads/gh-pages/states/WA/shape.geojson"
)
minlon, minlat, maxlon, maxlat = aoi.total_bounds

In [None]:
staqkwargs = {
    'channel': 'EHZ,HHZ,ENZ,HNZ,BNZ',
    'minlatitude': minlat,
    'minlongitude': minlon,
    'maxlongitude': maxlon,
    'maxlatitude': maxlat,
    'starttime': UTCDateTime('2025-01-01'),
    'endtime': UTCDateTime('2025-12-31'),
    'level': 'station'
}

# Query stations
inv = client.get_stations(**staqkwargs)

In [None]:
len(inv.networks)

In [None]:
nsll_set = set()
for net in inv.networks:
    for sta in net.stations:
        tup = (net.code,
               sta.code,
               sta.longitude,
               sta.latitude,
               sta.elevation,
               sta.start_date.datetime,
               sta.end_date.datetime if sta.end_date else None,
               #sta.total_number_of_channels,
               sta.is_active(),
               f'https://ds.iris.edu/mda/{net.code}/{sta.code}'
        )
        nsll_set.add(tup)

# 'total_number_of_channels'
df_seis = pd.DataFrame(list(nsll_set), columns=['network','station','longitude','latitude','elevation','start_datetime','end_datetime', 'is_active', 'station_info'])
df_seis.head()

In [None]:
print(len(df_seis), "active seismic stations in WA State")

In [None]:
# df_seis['marker-color'] = '#FF00FF' # magenta
# df_seis['marker-size'] = 'small'
# df_seis['marker-symbol'] = 'defibrillator'

In [None]:
df_seis['station_info'] = df_seis['station_info'].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')

In [None]:
# Pandas to GeoPandas
gf_seis = gpd.GeoDataFrame(df_seis, geometry=gpd.points_from_xy(df_seis.longitude, df_seis.latitude), crs='EPSG:4326')
gf_seis.explore(popup=True)

In [None]:
#gf_seis = gf_seis.drop(columns=['is_active'])
gf_seis.to_file('seismic-stations.geojson', driver='GeoJSON')

In [None]:
# Ideally use same schema across data providers...
#gf_seis.head()

### Infrasound

In [None]:
staqkwargs = {
    'channel': 'BDF',
    'minlatitude': minlat,
    'minlongitude': minlon,
    'maxlongitude': maxlon,
    'maxlatitude': maxlat,
    'starttime': UTCDateTime('2025-01-01'),
    'endtime': UTCDateTime('2025-12-31'),
    'level': 'station'
}

# Query stations
inv = client.get_stations(**staqkwargs)
len(inv.networks)

In [None]:
nsll_set = set()
for net in inv.networks:
    for sta in net.stations:
        tup = (net.code,
               sta.code,
               sta.longitude,
               sta.latitude,
               sta.elevation,
               sta.start_date.datetime,
               sta.end_date.datetime if sta.end_date else None,
               #sta.total_number_of_channels,
               sta.is_active(),
               f'https://ds.iris.edu/mda/{net.code}/{sta.code}'
        )
        nsll_set.add(tup)

# 'total_number_of_channels'
df_infrasound = pd.DataFrame(list(nsll_set), columns=['network','station','longitude','latitude','elevation','start_datetime','end_datetime', 'is_active', 'station_info'])
print(len(df_infrasound), "active infrasound stations in WA State")
df_infrasound.head()

In [None]:
df_infrasound['station_info'] = df_infrasound['station_info'].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')

In [None]:
gf_infra = gpd.GeoDataFrame(df_infrasound, geometry=gpd.points_from_xy(df_infrasound.longitude, df_infrasound.latitude), crs='EPSG:4326')
gf_infra.explore(popup=True)

In [None]:
gf_infra.to_file('infrasound-stations.geojson', driver='GeoJSON')

## GNSS


TODO...

In [None]:
# No time info
#!wget -nc https://geodesy.unr.edu/gps_timeseries/IGS20/llh/llh.out
!wget -nc https://geodesy.unr.edu/gps_timeseries/IGS20/llh/llh.out_sorted_by_add_date
!head llh.out_sorted_by_add_date

In [None]:
df = pd.read_csv('llh.out_sorted_by_add_date', sep=r'\s+',
                 names=['station','lat','lon','elevation','datestr']
)

In [None]:
df['start_datetime'] = pd.to_datetime(df['datestr'], format='%Y_%j')
df.head()

In [None]:
# Convert longitude from -360 to 0 range to -180 to 180 range
df['lon'] = df['lon'].apply(lambda x: x + 360 if x < -180 else x)

# Create GeoDataFrame
gf_gnss = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs='EPSG:4326')
gf_gnss.head()

In [None]:
# Add station url plain text
#gf_gnss['station_info'] = gf_gnss['station'].apply(lambda x: f'https://geodesy.unr.edu/NGLStationPages/stations/{x}.sta')

In [None]:
# Add hyperlink to station data
# NOTE: doesn;t work in Jupyter Notebook, but does work in exported HTML
gf_gnss['station_link'] = gf_gnss['station'].apply(lambda x: f'<a href="https://geodesy.unr.edu/NGLStationPages/stations/{x}.sta" target="_blank">{x}</a>')

In [None]:
gf_gnss.iloc[-1].station_link

In [None]:
aoi = gpd.read_file(
    "https://raw.githubusercontent.com/unitedstates/districts/refs/heads/gh-pages/states/WA/shape.geojson"
)

#gf_gnss.to_file('gnss.geojson') # 9MB
clipped = gf_gnss.clip(aoi.geometry[0])
clipped.to_file('wa_gnss.geojson') #126 KB

In [None]:
!ls -ltrh wa_gnss.geojson

In [None]:

m = clipped.explore(popup=True)
m

In [None]:
m.save('gnss-stations-wa.html')