Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pick detect integration #11

Merged
merged 4 commits into from
Apr 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions src/yews/cpic/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from .utils import compute_probs
from .utils import sliding_window_view
from .utils import chunks

def find_nonzero_runs(a):
# source: https://stackoverflow.com/
Expand All @@ -18,11 +19,30 @@ def find_nonzero_runs(a):


def detect(waveform, fs, wl, model, transform, g, threshold=0.5,
batch_size=None):
probs = compute_probs(model, transform, waveform,
shape=[3, fs * wl],
step=[1, int(g * fs)],
batch_size=batch_size)
batch_size=None, size_limit=None):
"""size_limit is the maximum number of waveform array elements in the
long dimension to be processed at a time. Can be used when working with
memory constraints. Should be an integer multiple of fs*wl"""
if size_limit:
if not (isinstance(size_limit, int)):
raise TypeError("size_limit must be type integer")
if size_limit % (fs*wl) != 0:
raise ValueError("size_limit must be integer multiple of fs*wl")
probs_list = []
offset = int(fs*(wl - g))
for chunk in chunks(waveform, size_limit, offset):
probs = compute_probs(model, transform, chunk,
shape=[3, fs * wl],
step=[1, int(g * fs)],
batch_size=batch_size)
probs_list.append(probs)
probs = np.concatenate(probs_list, axis=1)

else:
probs = compute_probs(model, transform, waveform,
shape=[3, fs * wl],
step=[1, int(g * fs)],
batch_size=batch_size)

probs[probs < threshold] = 0
p_prob, s_prob = probs[1:]
Expand Down
137 changes: 137 additions & 0 deletions src/yews/cpic/integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import numpy as np
import pandas as pd
from yews.cpic import pick

def detects2table(results_dict, wl, g, include_all=False, starttime=None):
'''
Converts dictionary of results from detect() function to pandas DataFrame
object. Columns include starttime, endtime, p probability and s probability
for each window. If include_all==True, all windows will be included in the
table, (not just those detected probability > threshold). Default is False.
If starttime argument is specified, the starttime and endtime columns will
contain datetime strings. Else, these columns contain values which are the
number of seconds since the start of the array.

Inputs:
results_dict: dictionary of results from detect function
detected_windows_only: Boolean
starttime: obspy UTCDateTime object

Output:
df: pandas data frame with probabilities of detection for each window
'''
data = []
cols = ('window start', 'window end', 'p prob', 's prob')
for i in range(len(results_dict['detect_p'])):
if results_dict['detect_p'][i] or results_dict['detect_s'][i] \
or include_all:
# log row in data table
if starttime:
window_start = str(starttime + i*g) # UTCDateTime object
window_end = str(starttime + i*g + wl)
else:
window_start = i*g # time in seconds since start of array
window_end = window_start + wl
p_prob = results_dict['detect_p'][i]
s_prob = results_dict['detect_s'][i]
row_entry = (window_start, window_end, p_prob, s_prob)
data.append(dict(zip(cols, row_entry)))
df = pd.DataFrame(data)
df = df[list(cols)]
return df

def find_runs_with_gaps(results_dict, max_gap):
'''
Find runs within results_dict from detect function where either the
detection probability for p or s is above the probability threshold,
allowing for max_gap 0's in between detected windows.

Inputs:
results_dict: dictionary of results from yews detect function
max_gap: max number of consecutive 0's allowable within runs

Output:
run_indices: list of pairs describing start and end of run windows
'''
scan_for_start = True
zero_count = 0
run_indices = []
for i in range(len(results_dict['detect_p'])):
if scan_for_start:
if results_dict['detect_p'][i] or results_dict['detect_s'][i]:
start_index = i
most_recent_nonzero = i
scan_for_start = False
else:
if results_dict['detect_p'][i] or results_dict['detect_s'][i]:
most_recent_nonzero = i
zero_count = 0
else:
if zero_count == max_gap:
run_indices.append([start_index, most_recent_nonzero])
zero_count = 0
scan_for_start = True
else:
zero_count += 1
return run_indices

def yield_pick_windows(array, fs, wl, g, results_dict, max_gap, buffer):
'''
Yield windows to run cpic picker over given detection results.
'''
run_indices = find_runs_with_gaps(results_dict, max_gap)
for run in run_indices:
start_index = int(fs*(run[0]*g - buffer))
if start_index < 0:
start_index = 0
end_index = int(fs*(run[1]*g + wl + buffer))
if end_index > (len(array[0]) - 1):
end_index = len(array[0]) - 1
yield start_index, array[:, start_index:end_index]

def generate_picks(array, model, transform, fs, wl, g_detect, g_pick,
results_dict, max_gap, buffer):
p_picks = []
p_confs = []
s_picks = []
s_confs = []
for start_index, window in yield_pick_windows(array, fs, wl, g_detect,
results_dict, max_gap, buffer):
pick_results = pick(window, fs, wl, model, transform, g_pick)
if type(pick_results['p']) == np.ndarray:
for i in range(len(pick_results['p'])):
picktime = pick_results['p'][i]
p_picks.append(start_index/fs + picktime)
p_confs.append(pick_results['p_conf'][i])
if type(pick_results['s']) == np.ndarray:
for i in range(len(pick_results['s'])):
picktime = pick_results['s'][i]
s_picks.append(start_index/fs + picktime)
s_confs.append(pick_results['s_conf'][i])
return {'p picks': p_picks, 'p confs': p_confs, 's picks': s_picks,
's confs': s_confs}

def picks2table(picks, starttime=None):
data = []
cols = ('phase', 'pick time', 'confidence')
for i in range(len(picks['p picks'])):
phase = 'p'
if starttime:
picktime = str(starttime + picks['p picks'][i])
else:
picktime = picks['p picks'][i]
conf = picks['p confs'][i]
row_entry = (phase, picktime, conf)
data.append(dict(zip(cols, row_entry)))
for i in range(len(picks['s picks'])):
phase = 's'
if starttime:
picktime = str(starttime + picks['s picks'][i])
else:
picktime = picks['s picks'][i]
conf = picks['s confs'][i]
row_entry = (phase, picktime, conf)
data.append(dict(zip(cols, row_entry)))
df = pd.DataFrame(data)
df = df[list(cols)]
return df
14 changes: 10 additions & 4 deletions src/yews/cpic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,16 @@ def probs2cfs(probs, sigma=3):

return cf_p, cf_s

def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
def chunks(array, size, offset=0):
"""Yield successive n-sized chunks from array, starting at an offset before
the end of the previous chunk."""
if not (isinstance(size, int) and isinstance(offset, int)):
raise TypeError("Arguments 'size' and 'offset' must be type integer")
slice_axis = array.ndim - 1
axis_length = array.shape[slice_axis]
for i in range(0, axis_length - offset, size - offset):
range_end = min(i + size, axis_length)
yield array.take(indices=range(i, range_end), axis=slice_axis)

def compute_probs(model, transform, waveform, shape, step, batch_size=None):
model.eval()
Expand Down