In [1]:
# Zachary Katz
# zachary_katz@mines.edu
# 16 April 2024

# Load all events and pick event start and end times
# Try classifying via k-means

# Imports
import os


import pandas as pd
import numpy as np
import scipy

In [2]:
# Load events into dataframe
data = {"event": [], "trace_time": []}
folders = [
    "2007_2009Events",
    "2010_2011Events",
    "2012_2013Events",
    "2014_2015Events",
    "2016_2019Events",
]
for folder in folders:
    for file in os.listdir(folder):
        df = pd.read_csv(f"{folder}/{file}", sep="\t")
        data["event"].append(df)
        data["trace_time"].append(file[:-4])

In [3]:
def derivative(time, x_col, order, crit, spacing):
    """
    Compute the first and second derivative of a smoothed time series
    Parameters
    time - event with times
    x_col - column of x values of which to take the derivative of
    order - order of butterworth filter
    crit - critical value of butterworth filter
    spacing - spacing of gradient
    Returns
    grad2 - Second derivative [list]
    """

    y_data = x_col - np.mean(x_col)

    # 1st derivative
    b, a = scipy.signal.butter(order, crit)
    filtered = scipy.signal.filtfilt(b, a, y_data, padlen=50)
    grad = np.gradient(filtered, spacing)
    return grad


def derivative2(time, grad, order, crit, spacing):
    # 2nd derivative
    b, a = scipy.signal.butter(order, crit)
    grad_filtered = scipy.signal.filtfilt(b, a, grad, padlen=50)
    grad2 = np.gradient(grad_filtered, spacing)

    return grad2

In [4]:
# Compute average second derivatives of all traces for each event
avg_grad2s = []
for event in data["event"][:]:
    x_cols = [col for col in event if col.endswith("x")]
    grad2s = []
    # print(len(event['time']),event['time'][0])
    for x_col in x_cols:
        grad = derivative(event["time"], event[x_col], 4, 0.1, 15)
        grad2 = derivative(event["time"], grad, 4, 0.05, 15)
        grad2s.append(grad2)
    avg_grad2s.append(np.nanmean(grad2s, axis=0))
data["grad2"] = avg_grad2s

# Compute index of max
max_index = [np.argmax(i) for i in data["grad2"]]
data["grad2maxIndex"] = max_index

# Calculate event start times based on 2nd derivative
data["ev_time"] = [
    data["event"][i]["time"][data["grad2maxIndex"][i]]
    for i in range(len(data["event"]))
]

KeyboardInterrupt: 

In [51]:
# make df and export
df = pd.DataFrame({"EventStartTime": data["ev_time"]})
df.to_csv("EventStartTime.txt", sep="\t", index=False)