In [1]:
import obspy
import matplotlib.pyplot as plt
import numpy as np
import glob
import pandas as pd
import time; import datetime
from obspy.core.utcdatetime import UTCDateTime
from obspy.clients.fdsn.client import Client
from collections import defaultdict
import geopandas as gpd
from obspy import read_inventory
import pyarrow

In [2]:
sta_metadata = read_inventory('alaska_stations.xml',format='STATIONXML')

## Convert to a pandas dataframe that's indexed by station information as well as time.

In [37]:
station_locs = defaultdict(dict)
for network in sta_metadata:
    for station in network:
        for chn in station:
            sid = f"{network.code}.{station.code}.{chn.location_code}.{chn.code[:-1]}" + chn.start_date.strftime('%Y%j')
            if sid in station_locs:
                station_locs[sid]["component"] += f",{chn.code[-1]}"
                station_locs[sid]["response"] += f",{chn.response.instrument_sensitivity.value:.2f}"
            else:
                component = f"{chn.code[-1]}"
                response = f"{chn.response.instrument_sensitivity.value:.2f}"
                dtype = chn.response.instrument_sensitivity.input_units.lower()
                tmp_dict = {}
                tmp_dict["longitude"], tmp_dict["latitude"], tmp_dict["elevation(m)"] = (
                    chn.longitude,
                    chn.latitude,
                    chn.elevation,
                )
                tmp_dict["component"], tmp_dict["response"], tmp_dict["unit"] = component, response, dtype
                tmp_dict["start_date"], tmp_dict["end_date"] = chn.start_date,chn.end_date
                if tmp_dict["end_date"] is None:
                    tmp_dict["end_date"] = UTCDateTime(2100,1,1)
                tmp_dict["network"], tmp_dict["station"] = network.code, station.code
                station_locs[sid] = tmp_dict

station_locs = pd.DataFrame.from_dict(station_locs,orient='index')
station_locs["id"] = station_locs.index

### Let's do a bit of clean-up... 

In [38]:
# Drop stations with < 3 components

station_locs = station_locs[(station_locs['component'].str.len()>=5)]

In [39]:
# Fix the one station that comes in weird

new_comp = station_locs.loc['XO.ET18..HH2018161','component'][4:9]
new_resp = ','.join(station_locs.loc['XO.ET18..HH2018161','response'].split(',')[2:5])
station_locs.loc['XO.ET18..HH2018161','component'] = new_comp
station_locs.loc['XO.ET18..HH2018161','response'] = new_resp

#### Last thing: remove the date from "id"

In [40]:
station_locs['id']=station_locs['id'].str.slice(stop=-7)

#### And convert the time columns to strings

In [41]:
station_locs['start_date']= [p.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] for p in station_locs['start_date']]
station_locs['end_date']= [p.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] for p in station_locs['end_date']]

### All done! Now let's save it as a parquet.

In [42]:
station_locs.to_parquet('alaska_stations.parquet',version='2.6')