# Created a Merged Station Inventory for GAIA / CRESST

Focus on WA State for now

Precip stations https://docs.synopticdata.com/services/metadata

Requires an academic account + token for interfacing with API 


TODO:
- create merged inventory (maybe STAC)
- set end_datetime = NaT if today

In [None]:
import requests
import os
import pandas as pd
import geopandas as gpd
import obspy

In [None]:
TOKEN = os.environ.get('SYNOPTIC_TOKEN')

baseAPI = "https://api.synopticdata.com/v2"
endpoint = "/stations/metadata"
url = baseAPI + endpoint

## Precipitation

In [None]:
# filter search to stations with precip data
# https://demos.synopticdata.com/variables/index.html
precip_vars = ["precip_accum"] # NOTE: there are a lot! e.g. hourly etc
sensorvars=True

params = dict(state="wa",
              token=TOKEN,
              sensorvars=True, # return sensor variable info
              vars=','.join(precip_vars), # restrict to precip only
              output='geojson',
)
response = requests.get(url, params=params)
data = response.json()


In [None]:
gfp = gpd.GeoDataFrame.from_features(data['features'], crs='EPSG:4326')
gfp.head()

In [None]:
# A rogue statio in Mexico!
gfp = gfp[gfp.stid != 'E0744']

#gfp.explore(column='status',popup=True, cmap=['green','red'])

In [None]:
#gfp.to_file('precip-stations-wa.geojson', driver='GeoJSON')
# For starters drop Inactive and restricted sensors
keep_cols = ['geometry','id','stid','name','elevation','status','period_of_record','station_info','restricted_data']
gfp = gfp[(gfp.status == 'ACTIVE') & (gfp.restricted_data == False)][keep_cols]
print(len(gfp), "active, unrestricted precip stations in WA")
gfp.head()

In [None]:
# Add separate columns for period_of_record start and end
gfp['start_datetime'] = pd.to_datetime(gfp['period_of_record'].apply(lambda x: x['start']))
gfp['end_datetime'] = pd.to_datetime(gfp['period_of_record'].apply(lambda x: x['end']))
new_order = ["id", "stid", "name", "start_datetime", "end_datetime", "elevation", "station_info", "geometry"]
gfp = gfp[new_order]
gfp.head()

In [None]:
# Add simple-styling for geojson.io
# https://github.com/mapbox/simplestyle-spec/blob/master/1.1.0/README.md

gfp['marker-color'] = '#0000FF' # blue
gfp['marker-size'] = 'small'
gfp['marker-symbol'] = 'water'

In [None]:
# Save simplified, styled geojson
gfp.to_file("precip-stations-wa-styled.geojson", driver='GeoJSON')
!ls -tlrh *geojson

## Streamflow

In [None]:
# Search USGS River Gauges:
# filter search to stations with precip data
# https://demos.synopticdata.com/variables/index.html
target_vars = ["stream_flow"]
sensorvars=True

params = dict(state="wa",
              token=TOKEN,
              #sensorvars=True, # Doesn't seem to work for geojson output...
              vars=','.join(target_vars),
              output='geojson',
)
response = requests.get(url, params=params)
data = response.json()


In [None]:
gfs = gpd.GeoDataFrame.from_features(data['features'], crs='EPSG:4326')
print('stations=', len(gfs))
gfs.head()

In [None]:
# Only ACTIVE sensores Add separate columns for period_of_record start and end
gfs = gfs[(gfs.status == 'ACTIVE')][keep_cols]

gfs['start_datetime'] = pd.to_datetime(gfs['period_of_record'].apply(lambda x: x['start']))
gfs['end_datetime'] = pd.to_datetime(gfs['period_of_record'].apply(lambda x: x['end']))
new_order = ["id", "stid", "name", "start_datetime", "end_datetime", "elevation", "station_info", "geometry"]
gfs = gfs[new_order]
gfs.head()

In [None]:
gfs['marker-color'] = '#00FFFF' # cyan
gfs['marker-size'] = 'small'
gfs['marker-symbol'] = 'waterfall'

In [None]:
# Save simplified, styled geojson
gfs.to_file("streamflow-stations-wa-styled.geojson", driver='GeoJSON')
!ls -tlrh *geojson

In [None]:
#Combine both precip and streamflow
gf = pd.concat([gfp, gfs], ignore_index=True)
print('Total stations (precip + streamflow)=', len(gf))
gf.head()
gf.to_file('combined-stations-wa-styled.geojson', driver='GeoJSON')

## Seismic 

In [None]:
from obspy.clients.fdsn import Client
from obspy import UTCDateTime

In [None]:
client = Client('IRIS')

In [None]:
aoi = gpd.read_file(
    "https://raw.githubusercontent.com/unitedstates/districts/refs/heads/gh-pages/states/WA/shape.geojson"
)
minlon, minlat, maxlon, maxlat = aoi.total_bounds

In [None]:
staqkwargs = {
    'channel': 'EHZ,HHZ,ENZ,HNZ,BNZ',
    'minlatitude': minlat,
    'minlongitude': minlon,
    'maxlongitude': maxlon,
    'maxlatitude': maxlat,
    'starttime': UTCDateTime('2025-01-01'),
    'endtime': UTCDateTime('2025-12-31'),
    'level': 'station'
}

# Query stations
inv = client.get_stations(**staqkwargs)

In [None]:
len(inv.networks)

In [None]:
nsll_set = set()
for net in inv.networks:
    for sta in net.stations:
        tup = (net.code,
               sta.code,
               sta.latitude,
               sta.longitude,
               sta.elevation,
               sta.start_date.datetime,
               sta.end_date.datetime if sta.end_date else None,
               sta.total_number_of_channels,
               sta.is_active(),
               f'https://ds.iris.edu/mda/{net.code}/{sta.code}'
        )
        nsll_set.add(tup)

df_seis = pd.DataFrame(list(nsll_set), columns=['network','station','latitude','longitude','elevation','start_datetime','end_datetime', 'total_number_of_channels', 'is_active', 'station_info'])
df_seis.head()

In [None]:
print(len(df_seis), "active seismic stations in WA State")

In [None]:
df_seis['marker-color'] = '#FF00FF' # magenta
df_seis['marker-size'] = 'small'
df_seis['marker-symbol'] = 'defibrillator'

In [None]:
gf_seis = gpd.GeoDataFrame(df_seis, geometry=gpd.points_from_xy(df_seis.longitude, df_seis.latitude), crs='EPSG:4326')
#gf_seis.explore()

In [None]:
gf_seis = gf_seis.drop(columns=['latitude','longitude','is_active'])
gf_seis.to_file('seismic-stations-wa-styled.geojson', driver='GeoJSON')

In [None]:
# Ideally use same schema across data providers...
gf_seis.head()

## GNSS

In [None]:
# No time info
#!wget -nc https://geodesy.unr.edu/gps_timeseries/IGS20/llh/llh.out
!wget -nc https://geodesy.unr.edu/gps_timeseries/IGS20/llh/llh.out_sorted_by_add_date
!head llh.out_sorted_by_add_date

In [None]:
df = pd.read_csv('llh.out_sorted_by_add_date', sep=r'\s+',
                 names=['station','lat','lon','elevation','datestr']
)

In [None]:
df['start_datetime'] = pd.to_datetime(df['datestr'], format='%Y_%j')
df.head()

In [None]:
# Convert longitude from -360 to 0 range to -180 to 180 range
df['lon'] = df['lon'].apply(lambda x: x + 360 if x < -180 else x)

# Create GeoDataFrame
gf_gnss = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs='EPSG:4326')
gf_gnss.head()

In [None]:
# Add station url plain text
#gf_gnss['station_info'] = gf_gnss['station'].apply(lambda x: f'https://geodesy.unr.edu/NGLStationPages/stations/{x}.sta')

In [None]:
# Add hyperlink to station data
# NOTE: doesn;t work in Jupyter Notebook, but does work in exported HTML
gf_gnss['station_link'] = gf_gnss['station'].apply(lambda x: f'<a href="https://geodesy.unr.edu/NGLStationPages/stations/{x}.sta" target="_blank">{x}</a>')

In [None]:
gf_gnss.iloc[-1].station_link

In [None]:
aoi = gpd.read_file(
    "https://raw.githubusercontent.com/unitedstates/districts/refs/heads/gh-pages/states/WA/shape.geojson"
)

#gf_gnss.to_file('gnss.geojson') # 9MB
clipped = gf_gnss.clip(aoi.geometry[0])
clipped.to_file('wa_gnss.geojson') #126 KB

In [None]:
!ls -ltrh wa_gnss.geojson

In [None]:

m = clipped.explore(popup=True)
m

In [None]:
m.save('gnss-stations-wa.html')