## Make detections on individual emergent signals on continuous data in parallel using STA/LTA triggering
- In parallel over days using dask
- Runs on one channel on one station at a time
- NOTE THAT THE 3-10 HZ FILTER IS HARD CODED IN

In [1]:
import datetime
from obspy.clients.fdsn.client import Client
import obspy
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
import dask
from dask.diagnostics import ProgressBar
matplotlib.rcParams['font.family']=['Arial']
client = Client('IRIS')
import pickle

In [2]:
from obspy.signal.trigger import classic_sta_lta
from obspy.signal.trigger import trigger_onset

### STA/LTA parameters

In [1]:
# Define sampling rate and step we want to chunk
sr = 200
step = 10 # seconds

# Define sta and lta window lengths
# Below chosen to optimize emergent signal detection!
sta_win = 10 # seconds, short term window
lta_win = 1000 # seconds, long term window

### Time endpoints and station

In [4]:
t1 = obspy.UTCDateTime("2018-07-01T00:00:00.000")
t2 = obspy.UTCDateTime("2019-07-01T00:00:00.000")

network = 'OO'
station = 'HYS14'
channel = 'HHN'

### Define functions

In [5]:
def trigger(t1,data_time_length,network,station,channel,sr,step,sta_win,lta_win):
    """
    t1 = start of time period to process, UTCDateTime
    data_time_length = overall length of time period, in seconds
    network = network code of desired station, string
    station = station code, string
    channel = channel code, string
    sr = sampling rate of desired channel
    step = duration of length of time to step through data with (speeds up processing), seconds
    sta_win = duration of short-term window, seconds
    lta_win = duration of long-term window, seconds
    
    Outputs:
    ontimes = list of on-times of all detection windows
    offtimes = list of off-times of all detection windows
    """
    
    t2 = t1 + data_time_length + 1000 # to account for signals at end
    t1 = t1 - 1000 # to account for signals at beginning
    
    
    # Pull in data
    try:
        st1 = client.get_waveforms(network,station, "*",channel, t1-5, t2+5);
        st1.resample(sr).merge(fill_value=0)
        st1.taper(0.05,max_length=5)
        st1.filter('bandpass',freqmin=3,freqmax=10)
        st1.trim(starttime=t1,endtime=t2)
        
        # Check to make sure there is data for the whole time period of t1 + data_time_length
        # If not, adjust some values to reflect that
        t1,data_time_length = check_data(st1,t1,sr,step,data_time_length)

        # Run STA/LTA triggering
        data = np.abs(st1[0].data)
        stalta,times = calc_stalta(data,sr,step,data_time_length,sta_win,lta_win)

        # Get triggers and the seconds they correspond to
        triggers = trigger_onset(stalta,2,1)
        ons = [times[tr[0]] for tr in triggers]
        offs = [times[tr[1]] for tr in triggers]


        # Get start and stop times of windows
        ontimes = [t1+o for o in ons]
        offtimes = [t1+o for o in offs]

        return(ontimes,offtimes)

    except:
        print(['No data for ',str(t1)])
        return([])
    

In [6]:
def check_data(stream,t1,sr,step,data_time_length):
    
    data = stream[0].data
    data_sample_length = sr * data_time_length

    
    # Catch for if data stream is less than specified
    if len(data) < data_sample_length:
        data_sample_length = int(len(data) - (len(data)%(sr*step)))
        data_time_length = int(data_sample_length / sr)
        
    # Catch for if start time is not as specified
    if stream[0].stats.starttime != t1:
        t1 = stream[0].stats.starttime
    
    return(t1,data_time_length)

In [7]:
def calc_stalta(data,sr,step,data_time_length,sta_win,lta_win):
    
    # Matricize the data as step s chunks
    data_sample_length = sr * data_time_length
    
    
    chunked = np.reshape(data[0:data_sample_length],[int(data_sample_length/(sr*step)),int(sr*step)])
    chunked_medians = [np.median(chunked[i,:]) for i in range(np.shape(chunked)[0])]
    chunked_times = np.linspace(0,data_time_length,len(chunked_medians))
    
    # Step through and calculate sta & lta every step s
    sta = []
    lta = []
    for i,vec in enumerate(chunked_medians):

        # STA is median of the next window
        nwin = int(sta_win / step)
        sta.append(np.median(chunked_medians[i:i+nwin]))


        # LTA is median of the past window
        nwin = int(lta_win / step)
        lta.append(np.median(chunked_medians[i-nwin:i]))
    
    stalta = np.array(sta)/np.array(lta)
    
    return(stalta,chunked_times)

### Bin data by days and process in parallel

In [8]:
time_bins = pd.date_range(start=t1.datetime, end=t2.datetime, freq='d')
data_time_length = 24 * 60 * 60

In [9]:
@dask.delayed
def loop_days(t1,data_time_length,network,station,channel,sr,step,sta_win,lta_win):
    t1 = obspy.UTCDateTime(t1)
    return trigger(t1,data_time_length,network,station,channel,sr,step,sta_win,lta_win)

In [10]:
lazy_results = [loop_days(t,data_time_length,network,station,channel,sr,step,sta_win,lta_win) for t in time_bins]

In [11]:
with ProgressBar():
    results = dask.compute(lazy_results)

[                                        ] | 0% Completed | 22.1s

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


[                                        ] | 0% Completed | 22.7s

  stalta = np.array(sta)/np.array(lta)


[####                                    ] | 11% Completed |  2min 45.4s['No data for ', '2019-06-12T23:43:20.000000Z']
[#####                                   ] | 13% Completed |  2min 57.5s['No data for ', '2019-06-15T23:43:20.000000Z']
[######                                  ] | 15% Completed |  3min 32.6s['No data for ', '2019-05-26T23:43:20.000000Z']
[#######                                 ] | 18% Completed |  4min  8.0s['No data for ', '2019-06-20T23:43:20.000000Z']
[###########                             ] | 29% Completed |  6min 51.9s['No data for ', '2019-06-16T23:43:20.000000Z']
[############                            ] | 30% Completed |  7min  9.6s['No data for ', '2018-12-18T23:43:20.000000Z']
[############                            ] | 31% Completed |  7min 30.3s['No data for ', '2019-06-14T23:43:20.000000Z']
[##############                          ] | 35% Completed |  8min 13.6s['No data for ', '2019-06-17T23:43:20.000000Z']
[##############                         

### Concat results and save

In [12]:
save_dir='emergent_detections/'
pickle_name='HYS14_2018_2019_HHN.pickle'
ons = []
offs = []
for ap in results[0]:
    if len(ap) > 0:
        ons.extend(ap[0])
        offs.extend(ap[1])
sort_ind = np.argsort(ons)
ons = [ons[i] for i in sort_ind]
offs = [offs[i] for i in sort_ind]
triggers = [[ons[i],offs[i]] for i in range(len(ons))]


# Toss any with durations less than 30 s
durations = [t[1]-t[0] for t in triggers]
dur_keep = [i for i,e in enumerate(durations) if e >= 30]
triggers = [triggers[i] for i in dur_keep]


pickle.dump(triggers,open(pickle_name, 'wb'))

In [13]:
len(triggers)

12348