We want to create an initial set of template waveforms, one template being a three-component seismogram.

This is what we want to keep in mind to have for all three channels:
- Windows with both P- and S- pick
- Only one earthquake in the window
- Raw SNR > 2 for P-wave
- Raw SNR > 3 for S-wave
- Prioritize higher magnitudes first

To-do next:
- For each earthquake, loop over all 3 channels and check P and S SNR in each
- If both SNRs are high enough for all 3 channels (6 checks), the event gets saved in a list

In [97]:
import obspy
import matplotlib.pyplot as plt
import numpy as np
import glob
import pandas as pd
import time; import datetime
from obspy.core.utcdatetime import UTCDateTime
from obspy.clients.fdsn.client import Client

In [98]:
def read_files(base_folder):
    ''' Reads in earthquake catalog files from CSS format into useful pandas dataframes
    
        Inputs:
        base_folder = string containing name of year and month of interest, e.g. '2019_01'
        
        Outputs dataframes:
        assoc
        arrivals
        origin
    '''
    
    
    base_dir = 'catalog_css/'+base_folder+'/'
    
    arr_files = glob.glob(base_dir + 'catalog_XO_*arrival')
    assoc_files = glob.glob(base_dir + 'catalog_XO_*assoc')
    origin_files = glob.glob(base_dir + 'catalog_XO_*origin')
    
    # Read data into pandas dataframe
    arrivals = pd.concat([pd.read_csv(f,header=None,delim_whitespace=True) for f in arr_files])
    assoc = pd.concat([pd.read_csv(f,header=None,delim_whitespace=True) for f in assoc_files])
    origin = pd.concat([pd.read_csv(f,header=None,delim_whitespace=True) for f in origin_files])

    # Rename some columns for clarity:
    assoc=assoc.rename(columns={0: "arrivalid", 1: "originid",2:"stationcode",3:"phase"})
    origin = origin.rename(columns={4:'originid',3:'epochtime',20:'magnitude'})
    arrivals=arrivals.rename(columns={2: "arrivalid", 6: "channel",0:"stationcode", 1:'epochtime',7:'phase'})
    
    return(assoc,arrivals,origin)

In [100]:
def calc_snr(stream,phase):
    # Calculate SNR of arrival
    # Currently written only for P-waves
    
    if phase == 'P':
        ind = 0
    if phase == 'S':
        ind = 1
    data = stream[ind].data
    sr = int(stream[ind].stats.sampling_rate)
    sampleind = ((2*60)*sr)-1 # index of sample that contains pick
    snr_num = max(abs(data[sampleind:(sampleind+(2*sr))]))
    snr_denom = np.std(data[(sampleind-(10*sr)):sampleind])
    snr = snr_num/snr_denom
    return(snr)

In [116]:
# Load catalog info for one month
month = '2019_03'
(assoc,arrivals,origin) = read_files(month)

# Sort events by magnitude, descending
origin = origin.sort_values(by=['magnitude'],ascending=False)
origin_ids = origin['originid']

# Loop through events
for id in origin_ids:

    # Get arrivals associated with this earthquake
    subset = assoc.loc[assoc['originid']==id]
    arrival_subset = arrivals.iloc[subset['arrivalid']-1]
    arrival_subset.reset_index(drop=True,inplace=True)
    
    # Get station names
    stations = arrival_subset['stationcode']

    # Get list of stations that have both P and S pick
    repeats = []
    for station in stations:
        sub = arrival_subset.loc[(arrival_subset['stationcode']==station)]
        if len(np.unique(sub['phase']))>1:
            repeats.append(station)
            
    # Loop through S-wave picks
    repeat_subset = arrival_subset.loc[(arrival_subset['stationcode'].isin(repeats))]
    repeat_subset.reset_index(drop=True,inplace=True)
    repeat_subset = repeat_subset.loc[(repeat_subset['phase']=='S')]
    print(repeat_subset)


   stationcode     epochtime  arrivalid        3  4      5 channel phase  8  \
35        Q18K  1.551997e+09       8821  2019066 -1  15121     BHE     S  -   
38        ET19  1.551997e+09       8825  2019066 -1     -1     HHE     S  -   
39        ET18  1.551997e+09       8826  2019066 -1     -1     HHE     S  -   
40        KAHG  1.551997e+09       8827  2019066 -1   2336     BHE     S  -   
41        KAKN  1.551997e+09       8828  2019066 -1   2341     BHE     S  -   
42        EP15  1.551997e+09       8829  2019066 -1     -1     HHE     S  -   
43         ACH  1.551997e+09       8830  2019066 -1   2103     BHN     S  -   
44        ET20  1.551997e+09       8831  2019066 -1     -1     HHE     S  -   
45        ANCK  1.551997e+09       8832  2019066 -1   2157     BHE     S  -   
48        ET17  1.551997e+09       8836  2019066 -1     -1     HHE     S  -   
50        P18K  1.551997e+09       8840  2019066 -1  14672     BHE     S  -   
52        Q16K  1.551997e+09       8845  2019066 -1 

KeyboardInterrupt: 

In [None]:
# Loop through P-wave picks
    repeat_subset = arrival_subset.loc[(arrival_subset['stationcode'].isin(repeats))]
    repeat_subset = repeat_subset.drop_duplicates(subset=['stationcode'])
    repeat_subset.reset_index(drop=True,inplace=True)

    for i in range(0,len(repeat_subset)):
        pick = repeat_subset.iloc[i]
        
        # Skip global array stations that have low sample rates and complicated location codes
        if len(pick['stationcode'])>3:
            continue
            
        # P arrival time:
        pick_epoch = pick['epochtime']
        basetime = datetime.datetime.utcfromtimestamp(pick_epoch)
        
        # We want the streams for x minutes before and after the pick time:
        t1 = UTCDateTime(basetime)-datetime.timedelta(minutes = 2)
        t2 = UTCDateTime(basetime)+datetime.timedelta(minutes = 10)

        # Now we query the data from IRIS for each of the stations 20 min before and after the origin time, and add it to the master stream:
        client = Client("IRIS")

        # Download the streams INCLUDING the instrument response:
        channel_base = pick['channel'][0:2]
        # st = client.get_waveforms("XO,AK,AV",pick['stationcode'], "--",channel_base+'*', t1, t2,attach_response=True)
        st = client.get_waveforms("XO,AK,AV",pick['stationcode'], "--",channel_base+'Z', t1, t2,attach_response=True)  
    
        # Plot the raw waveform:
        %matplotlib inline
        fig = plt.figure()
        t11 = UTCDateTime(basetime)-datetime.timedelta(seconds = 12)
        t22 = UTCDateTime(basetime)+datetime.timedelta(seconds = 3)
        st.plot(fig = fig,starttime=t11,endtime=t22) 
        plt.axvline(x=UTCDateTime(basetime),linewidth=2, color='r')
        plt.axvline(x=UTCDateTime(basetime)-datetime.timedelta(seconds = 10),linewidth=2,color='g')
        plt.axvline(x=UTCDateTime(basetime)+datetime.timedelta(seconds = 2),linewidth=2,color='g')
        plt.show()
        # st.spectrogram(log=True, title=str(st[0].stats.starttime))
        
        # Calculate SNR of arrival
        snr = calc_snr(st,'P')
        print(snr)


        input("Press Enter to continue...")