## Quick code to read in station information from all picks in the Alaska QuakeML files, and save it to xml for much faster reading later.

In [7]:
import obspy
import matplotlib.pyplot as plt
import numpy as np
import glob
import pandas as pd
import time; import datetime
from obspy.core.utcdatetime import UTCDateTime
from obspy.clients.fdsn.client import Client
from collections import defaultdict
import geopandas as gpd
from obspy import read_inventory
import pyarrow

### Read in information from all quakeml files

In [6]:
ml_files = glob.glob('catalog_css/**/XO_*.quakeml')
print(ml_files)

['catalog_css/2018_12/XO_2018_12.quakeml', 'catalog_css/2019_06/XO_2019_06.quakeml', 'catalog_css/2019_01/XO_2019_01.quakeml', 'catalog_css/2019_07/XO_2019_07.quakeml', 'catalog_css/2018_07/XO_2018_07.quakeml', 'catalog_css/2018_09/XO_2018_09.quakeml', 'catalog_css/2018_08/XO_2018_08.quakeml', 'catalog_css/2018_06/XO_2018_06.quakeml', 'catalog_css/2018_11/XO_2018_11.quakeml', 'catalog_css/2018_10/XO_2018_10.quakeml', 'catalog_css/2019_02/XO_2019_02.quakeml', 'catalog_css/2019_05/XO_2019_05.quakeml', 'catalog_css/2019_04/XO_2019_04.quakeml', 'catalog_css/2019_03/XO_2019_03.quakeml', 'catalog_css/2018_05/XO_2018_05.quakeml']


### THIS TAKES ~ hour

In [36]:
count = 0
cat = obspy.core.event.Catalog()
for file in ml_files:
    count += 1
    print(count)
    cat.extend(obspy.core.event.read_events(file))


1
2
3
4
5
6
7
8




9




10




11




12
13




14
15


### Get a list of all station-channel pairs that have picks

In [None]:
picks = [p.picks for p in cat.events]
picks = sum(picks,[])

networks = [p.waveform_id.network_code for p in picks]
stations = [p.waveform_id.station_code for p in picks]
channels = [p.waveform_id.channel_code[0:2] + '*' for p in picks]
# Toss pressure channels:
channelToRemove = 'HD*'
channels = [value for value in channels if value != channelToRemove]

sta_list = [f"{n}.{s}..{c[0:2]}" for n, s, c in zip(networks,stations,channels)]
sta_list = np.unique(sta_list)



In [54]:
from obspy import read_inventory
sta_metadata.write("alaska_stations.xml",
                format="STATIONXML") 

### Get all the info for those stations from IRIS

In [None]:
network = ",".join((np.unique(networks)).tolist())
channel = ",".join((np.unique(channels)).tolist())
station = ",".join((np.unique(stations)).tolist())

origins = [p.origins for p in cat.events]
times = [p[0].time for p in origins]
starttime = np.min(times)
endtime = np.max(times)

sta_metadata = Client("iris").get_stations(starttime=starttime,endtime=endtime,network=network,channel=channel,station=station,location='',level='response')
sta_dict = {'network':network,'channel':channel,'station':station}

In [42]:
origins = [p.origins for p in cat.events]
times = [p[0].time for p in origins]
starttime = np.min(times)
endtime = np.max(times)

sta_metadata = Client("iris").get_stations(starttime=starttime,endtime=endtime,network=network,channel=channel,station=station,location='',level='response')

### Write the station inventory to xml file for safekeeping

In [51]:
sta_metadata.write("alaska_stations.xml",
                format="STATIONXML") 

Unnamed: 0,longitude,latitude,elevation(m),component,response,unit,id
AK.ANM..BH,-165.373200,64.564600,338.0,"E,N,Z","488760000.00,488760000.00,488760000.00",m/s,AK.ANM..BH
AK.ATKA..BH,-174.197495,52.201599,55.0,"E,N,Z","488760000.00,488760000.00,488760000.00",m/s,AK.ATKA..BH
AK.ATKA..BN,-174.197495,52.201599,55.0,"E,N,Z","213760.00,213760.00,213760.00",m/s**2,AK.ATKA..BN
AK.ATKA..HN,-174.197495,52.201599,55.0,"E,N,Z","213760.00,213760.00,213760.00",m/s**2,AK.ATKA..HN
AK.BAGL..BH,-142.091507,60.489601,1470.0,"E,N,Z","488760000.00,488760000.00,488760000.00",m/s,AK.BAGL..BH
...,...,...,...,...,...,...,...
XV.FAPT..HH,-149.083100,64.549800,111.2,"E,N,Z","503939000.00,503939000.00,503939000.00",m/s,XV.FAPT..HH
XV.FNN1..HH,-149.217800,64.571600,110.0,"E,N,Z","503939000.00,503939000.00,503939000.00",m/s,XV.FNN1..HH
XV.FNN2..HH,-149.445600,64.575600,134.6,"E,N,Z","503939000.00,503939000.00,503939000.00",m/s,XV.FNN2..HH
XV.FPAP..HH,-149.099200,64.613000,105.5,"E,N,Z","503939000.00,503939000.00,503939000.00",m/s,XV.FPAP..HH


### Read it back in...

In [2]:
# round trip
%time
sta_metadata = read_inventory('alaska_stations.xml',format='STATIONXML')

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.25 µs


In [4]:
station_locs = defaultdict(dict)
for network in sta_metadata:
    for station in network:
        for chn in station:
            sid = f"{network.code}.{station.code}.{chn.location_code}.{chn.code[:-1]}" + chn.start_date.strftime('%Y%j')
            if sid in station_locs:
                station_locs[sid]["component"] += f",{chn.code[-1]}"
                station_locs[sid]["response"] += f",{chn.response.instrument_sensitivity.value:.2f}"
            else:
                component = f"{chn.code[-1]}"
                response = f"{chn.response.instrument_sensitivity.value:.2f}"
                dtype = chn.response.instrument_sensitivity.input_units.lower()
                tmp_dict = {}
                tmp_dict["longitude"], tmp_dict["latitude"], tmp_dict["elevation(m)"] = (
                    chn.longitude,
                    chn.latitude,
                    chn.elevation,
                )
                tmp_dict["component"], tmp_dict["response"], tmp_dict["unit"] = component, response, dtype
                tmp_dict["start_date"], tmp_dict["end_date"] = chn.start_date,chn.end_date
                tmp_dict["network"], tmp_dict["station"] = network.code, station.code
                station_locs[sid] = tmp_dict

station_locs = pd.DataFrame.from_dict(station_locs,orient='index')
station_locs["id"] = station_locs.index

### Let's do a bit of clean-up... 

In [5]:
# Drop stations with < 3 components

station_locs = station_locs[(station_locs['component'].str.len()>=5)]

In [6]:
# Fix the one station that comes in weird

new_comp = station_locs.loc['XO.ET18..HH2018161','component'][4:9]
new_resp = ','.join(station_locs.loc['XO.ET18..HH2018161','response'].split(',')[2:5])
station_locs.loc['XO.ET18..HH2018161','component'] = new_comp
station_locs.loc['XO.ET18..HH2018161','response'] = new_resp
        

#### Last thing: remove the date from "id"

In [7]:
station_locs['id']=station_locs['id'].str.slice(stop=-7)

### All done! Now let's save it as a parquet.

In [9]:
station_locs.to_parquet()

ImportError: Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:
 - Missing optional dependency 'pyarrow'. pyarrow is required for parquet support. Use pip or conda to install pyarrow.
 - Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.