In [None]:
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt
import numpy as np
import datetime
import pandas as pd
import obspy
import glob 
import matplotlib.dates as mdates 
from obspy import UTCDateTime
from obspy.clients.fdsn import Client
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import matplotlib.ticker as mticker


In [None]:
# Read the CSV file into a DataFrame
df = pd.read_csv('/home/velgueta/notebooks/RainierDas/Notebooks/Phase-picking/csv/MORADAS_20230827_10.csv')

In [None]:

# Select the desired columns
selected_df = df[['station_code', 'trace_s_arrival', 'trace_p_arrival']]

# Drop rows that have NaN in both 'trace_s_arrival' and 'trace_p_arrival' columns
cleaned_df = selected_df.dropna(subset=['trace_s_arrival', 'trace_p_arrival'], how='all')

# Remove the word "das" from the 'station_code' column
cleaned_df['station_code'] = cleaned_df['station_code'].str.replace('das', '')

# Convert the time columns to datetime type
cleaned_df['trace_s_arrival'] = pd.to_datetime(cleaned_df['trace_s_arrival'], errors='coerce')
cleaned_df['trace_p_arrival'] = pd.to_datetime(cleaned_df['trace_p_arrival'], errors='coerce')

# Extract the minutes from both time columns and take the minimum of both for each row
cleaned_df['minute'] = cleaned_df[['trace_s_arrival', 'trace_p_arrival']].min(axis=1).dt.floor('T')

# Group the rows by the value of minutes and save each group into a separate DataFrame
grouped_dfs = {minute: group.drop(columns='minute') for minute, group in cleaned_df.groupby('minute')}

# Save each DataFrame to a separate CSV file
for minute, df_group in grouped_dfs.items():
    minute_str = minute.strftime('%Y-%m-%d_%H-%M')
    df_group.to_csv(f'pickerbyhour_2023-08-27_10/file_{minute_str}.csv', index=False)

# Display an example of the separated DataFrames
for minute, df_group in grouped_dfs.items():
    print(f"Minute: {minute}")
    print(df_group.head())
    print("\n")


In [None]:
chan_min=0
chan_max=-1

#h5 file
data_file = h5py.File('/1-fnp/petasaur/p-wd07/rainier/decimator_2023-12-10_12.19.00_UTC.h5','r')


this_data = np.array(data_file['Acquisition/Raw[0]/RawData'][:,chan_min:chan_max])
this_time = np.array(data_file['Acquisition/Raw[0]/RawDataTime'])
            
attrs=dict(data_file['Acquisition'].attrs)

data_file.close()
                            
channel_number = chan_max -chan_min
low_cut1 = 2.5
hi_cut1 = 10
fs=attrs['MaximumFrequency']*2
print(fs)

#filter

b,a = butter(2,(low_cut1,hi_cut1),'bp',fs=fs)
data_filt = filtfilt(b,a,this_data,axis=0)
date_format = mdates.DateFormatter('%H:%M:%S')
x_lims = mdates.date2num(this_time)
#x_max = data_filt.shape[1] * attrs['SpatialSamplingInterval']
#dx = x_max / data_filt.shape[1]


fig,ax=plt.subplots(figsize=(15,10))

plt.imshow(data_filt.T,cmap='seismic',aspect='auto',vmin=-0.05,vmax=0.05,extent=[x_lims[0],x_lims[-1],data_filt.shape[1],0])
plt.xlabel("Time UTC", fontsize=25)
plt.ylabel(" DAS channels ", fontsize=25)


ax.xaxis.set_major_formatter(date_format)
ax.xaxis_date()
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

#here is where we insert the point from the csv
picks = 'pickerbyhour_2023-12-10_12/file_2023-12-10_12-19.csv'
selected_df = pd.read_csv(picks)

p_wave_label_added = False
s_wave_label_added = False

for index, row in selected_df.iterrows():
    blue_point_time = row['trace_s_arrival']
    print(blue_point_time)
    red_point_time = row['trace_p_arrival']
    print(red_point_time)
    station_code = int(row['station_code'])

    if pd.notna(red_point_time):
        red_point_date = mdates.date2num([red_point_time])[0]
        ax.plot(red_point_date, station_code, 'ro', markersize=3, label='P-wave' if not p_wave_label_added else "")
        p_wave_label_added = True

    if pd.notna(blue_point_time):
        blue_point_date = mdates.date2num([blue_point_time])[0]
        ax.plot(blue_point_date, station_code, 'bo', markersize=3, label='S-wave' if not s_wave_label_added else "")
        s_wave_label_added = True
        
ax.legend(fontsize=20)
plt.savefig('./picks_2023-12-10-12-19.png')
plt.show()


In [None]:
# load csv
df = pd.read_csv('/home/velgueta/notebooks/RainierDas/Notebooks/Phase-picking/MORADAS_avalanche.csv')

# Seleccionar las columnas deseadas
selected_df = df[['station_code', 'trace_s_arrival', 'trace_p_arrival']]

# Eliminar filas que tienen NaN en ambas columnas 'trace_s_arrival' y 'trace_p_arrival'
cleaned_df = selected_df.dropna(subset=['trace_s_arrival', 'trace_p_arrival'], how='all')

# Remover la palabra "das" de la columna 'station_code'
cleaned_df['station_code'] = cleaned_df['station_code'].str.replace('das', '')

# Convertir las columnas de tiempo a tipo datetime
cleaned_df['trace_s_arrival'] = pd.to_datetime(cleaned_df['trace_s_arrival'], errors='coerce')
cleaned_df['trace_p_arrival'] = pd.to_datetime(cleaned_df['trace_p_arrival'], errors='coerce')

# Extraer los minutos de ambas columnas de tiempo y tomar el mínimo de ambos para cada fila
cleaned_df['minute'] = cleaned_df[['trace_s_arrival', 'trace_p_arrival']].min(axis=1).dt.floor('T')

# Guardar el DataFrame limpio en un nuevo archivo CSV
cleaned_df.to_csv('/home/velgueta/notebooks/RainierDas/Notebooks/Phase-picking/MORADAS_avalanche_cleaned.csv', index=False)

# Mostrar una vista previa del DataFrame limpio
#print(cleaned_df.head())


In [None]:
chan_min = 0
chan_max = -1

# h5 file
data_file = h5py.File('/home/velgueta/notebooks/RainierDas/Notebooks/Phase-picking/decimator_2023-12-10_12.18.00_UTC.h5', 'r')

this_data = np.array(data_file['Acquisition/Raw[0]/RawData'][:, chan_min:chan_max])
this_time = np.array(data_file['Acquisition/Raw[0]/RawDataTime'])

attrs = dict(data_file['Acquisition'].attrs)

data_file.close()

channel_number = chan_max - chan_min
low_cut1 = 2
hi_cut1 = 10
fs = attrs['MaximumFrequency'] * 2
print(fs)

# Filter
b, a = butter(2, (low_cut1, hi_cut1), 'bp', fs=fs)
data_filt = filtfilt(b, a, this_data, axis=0)
date_format = mdates.DateFormatter('%H:%M:%S')
x_lims = mdates.date2num(this_time)

fig, ax = plt.subplots(figsize=(15, 10))

plt.imshow(data_filt.T, cmap='seismic', aspect='auto', vmin=-0.05, vmax=0.05, extent=[x_lims[0], x_lims[-1], data_filt.shape[1], 0])
plt.xlabel("Time UTC", fontsize=20)
plt.ylabel("Optical distance (km)", fontsize=20)

ax.xaxis.set_major_formatter(date_format)
ax.xaxis_date()
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Insert the point from the CSV
picks = '/home/velgueta/notebooks/RainierDas/Notebooks/Phase-picking/MORADAS_avalanche_cleaned.csv'
selected_df = pd.read_csv(picks)

p_wave_label_added = False
s_wave_label_added = False

for index, row in selected_df.iterrows():
    blue_point_time = row['trace_s_arrival']
    print(blue_point_time)
    red_point_time = row['trace_p_arrival']
    print(red_point_time)
    
    station_code = row['station_code']
    
    try:
        station_code = int(station_code)  # Convert to integer if possible
    except ValueError:
        print(f"Non-numeric station_code encountered: {station_code}")
        continue  # Skip this row if station_code is non-numeric
    
    if pd.notna(red_point_time):
        red_point_date = mdates.date2num([red_point_time])[0]
        ax.plot(red_point_date, station_code, 'ro', markersize=1, label='P-wave' if not p_wave_label_added else "")
        p_wave_label_added = True

    if pd.notna(blue_point_time):
        blue_point_date = mdates.date2num([blue_point_time])[0]
        ax.plot(blue_point_date, station_code, 'bo', markersize=1, label='S-wave' if not s_wave_label_added else "")
        s_wave_label_added = True

ax.legend(fontsize=20)
plt.savefig('./picks_2023-12-10-12-19.png')
plt.show()


In [None]:
chan_min =  0
chan_max = -1
low_cut1 =  2
hi_cut1  =  9

    
def plot_data_first(ax, file_path, zoom_start_str, zoom_end_str, picks_path,label):
    # Cargar los datos
    data_file = h5py.File(file_path, 'r')
    this_data = np.array(data_file['Acquisition/Raw[0]/RawData'][:, chan_min:chan_max])
    this_time = np.array(data_file['Acquisition/Raw[0]/RawDataTime'])
    attrs = dict(data_file['Acquisition'].attrs)
    data_file.close()

    # Filtrar los datos
    fs        = attrs['MaximumFrequency'] * 2
    b, a      = butter(2, (low_cut1, hi_cut1), 'bp', fs=fs)
    data_filt = filtfilt(b, a, this_data, axis=0)
    x_max     = this_data.shape[1] * attrs['SpatialSamplingInterval']/1000 #km
    

    # Convertir tiempo a segundos desde el inicio
    ts = (this_time - this_time[0]).astype(np.float64) * 1e-6  # Convertir microsegundos a segundos
    
    # Convertir zoom_start_str y zoom_end_str a segundos desde el inicio
    zoom_start_time = mdates.datestr2num(zoom_start_str)
    zoom_end_time = mdates.datestr2num(zoom_end_str)

    # Convertir tiempos de zoom a segundos desde el inicio
    zoom_start = (zoom_start_time - mdates.date2num(this_time[0])) * 24 * 3600
    zoom_end = (zoom_end_time - mdates.date2num(this_time[0])) * 24 * 3600
    
    # Asegurar que el zoom esté dentro de los límites
    zoom_start = max(0, zoom_start)
    zoom_end = min(ts[-1], zoom_end)
    
    # Plotear el subgráfico
    ax.imshow(data_filt.T, cmap='seismic', aspect='auto', vmin=-0.1, vmax=0.1,
              extent=[ts[0], ts[-1], x_max, 0])
    
    ax.set_xlim(zoom_start, zoom_end)
    ax.set_xlabel("Time (sec)", fontsize=25)
    ax.set_ylabel("Optical distance (km)", fontsize=25)
    ax.tick_params(axis='x', labelsize=16)
    ax.tick_params(axis='y', labelsize=16)
    ax.xaxis.set_major_locator(mticker.MultipleLocator(2))
    ax.text(0.02, 0.98, label, transform=ax.transAxes, fontsize=18, verticalalignment='top', horizontalalignment='left')


    # Insertar los puntos desde el CSV
    selected_df = pd.read_csv(picks_path)
    selected_df
    p_wave_label_added = False
    s_wave_label_added = False

    for index, row in selected_df.iterrows():
        blue_point_time = row['trace_s_arrival']
        red_point_time = row['trace_p_arrival']
        station_code = int(row['station_code'])*attrs['SpatialSamplingInterval']/1000

        if pd.notna(red_point_time):
            red_point_seconds = (mdates.date2num([red_point_time])[0] - mdates.date2num(this_time[0])) * 24 * 3600
            ax.plot(red_point_seconds, station_code, 'ro', markersize=2, label='P-wave' if not p_wave_label_added else "")
            p_wave_label_added = True

        if pd.notna(blue_point_time):
            blue_point_seconds = (mdates.date2num([blue_point_time])[0] - mdates.date2num(this_time[0])) * 24 * 3600
            ax.plot(blue_point_seconds, station_code, 'bo', markersize=2, label='S-wave' if not s_wave_label_added else "")
            s_wave_label_added = True

    ax.legend(fontsize=15,loc = 'upper right')

def plot_data_lastones(ax, file_path, zoom_start_str, zoom_end_str, picks_path, label):
    #
    data_file = h5py.File(file_path, 'r')
    this_data = np.array(data_file['Acquisition/Raw[0]/RawData'][:, chan_min:chan_max])
    this_time = np.array(data_file['Acquisition/Raw[0]/RawDataTime'])
    attrs     = dict(data_file['Acquisition'].attrs)
    x_max     = this_data.shape[1] * attrs['SpatialSamplingInterval']/1000 # km
    data_file.close()
    
    
    # filter
    fs = attrs['MaximumFrequency'] * 2
    b, a = butter(2, (low_cut1, hi_cut1), 'bp', fs=fs)
    data_filt = filtfilt(b, a, this_data, axis=0)

    # convert time to seconds from the start
    ts = (this_time - this_time[0]).astype(np.float64) * 1e-6  # Convert microseconds to seconds
    
    # convert zoom_start_str and zoom_end_str to seconds from the start
    zoom_start_time = mdates.datestr2num(zoom_start_str)
    zoom_end_time = mdates.datestr2num(zoom_end_str)

    # Convert zoom times to seconds from the start
    
    zoom_start = (zoom_start_time - mdates.date2num(this_time[0])) * 24 * 3600
    zoom_end = (zoom_end_time - mdates.date2num(this_time[0])) * 24 * 3600
    
    # Ensure zoom is within limits
    zoom_start = max(0, zoom_start)
    zoom_end = min(ts[-1], zoom_end)
    
    # Plot the subplot
    ax.imshow(data_filt.T, cmap='seismic', aspect='auto', vmin=-0.1, vmax=0.1,
              extent=[ts[0], ts[-1], x_max, 0])
    

    
    ax.set_xlim(zoom_start, zoom_end)
    ax.set_xlabel("Time (sec)", fontsize=25)
    ax.tick_params(axis='x', labelsize=16)
    ax.tick_params(axis='y', labelsize=0)
    ax.xaxis.set_major_locator(mticker.MultipleLocator(2))
    ax.text(0.02, 0.98, label, transform=ax.transAxes, fontsize=18, verticalalignment='top', horizontalalignment='left')


    selected_df = pd.read_csv(picks_path)
    p_wave_label_added = False
    s_wave_label_added = False

    for index, row in selected_df.iterrows():
        blue_point_time = row['trace_s_arrival']
        red_point_time = row['trace_p_arrival']
        station_code = int(row['station_code'])*attrs['SpatialSamplingInterval']/1000
        
        #print(station_code)

        if pd.notna(red_point_time):
            red_point_seconds = (mdates.date2num([red_point_time])[0] - mdates.date2num(this_time[0])) * 24 * 3600
            ax.plot(red_point_seconds, station_code, 'ro', markersize=2, label='P-wave' if not p_wave_label_added else "")
            p_wave_label_added = True

        if pd.notna(blue_point_time):
            blue_point_seconds = (mdates.date2num([blue_point_time])[0] - mdates.date2num(this_time[0])) * 24 * 3600
            ax.plot(blue_point_seconds, station_code, 'bo', markersize=2, label='S-wave' if not s_wave_label_added else "")
            s_wave_label_added = True

    ax.legend(fontsize=15,loc = 'upper right')



In [None]:
# paper figures 
fig, axs = plt.subplots(1, 4, figsize=(18, 7))

# List of files with their respective zoom start and end times, and picks file paths
files = [
    ('/1-fnp/petasaur/p-jbod1/rainier/2023/08/27/decimator_2023-08-27_10.10.00_UTC.h5',
     '2023-08-27 10:10:22', '2023-08-27 10:10:32', 'pickerbyhour_2023-08-27_10/file_2023-08-27_10-10.csv'),
    
    ('/1-fnp/petasaur/p-jbod1/rainier/2023/08/27/decimator_2023-08-27_10.11.00_UTC.h5',
     '2023-08-27 10:11:00', '2023-08-27 10:11:10', 'pickerbyhour_2023-08-27_10/file_2023-08-27_10-11.csv'),
    
    ('/1-fnp/petasaur/p-jbod1/rainier/2023/08/27/decimator_2023-08-27_10.33.00_UTC.h5',
     '2023-08-27 10:33:45', '2023-08-27 10:34:55', 'pickerbyhour_2023-08-27_10/file_2023-08-27_10-33.csv'),
    
    ('/1-fnp/petasaur/p-jbod1/rainier/2023/08/27/decimator_2023-08-27_10.57.00_UTC.h5',
     '2023-08-27 10:57:36', '2023-08-27 10:57:46', 'pickerbyhour_2023-08-27_10/file_2023-08-27_10-57.csv')
]

# **Iteration a, b, c, d**
for i, (file_path, start, end, picks) in enumerate(files):
    label = f"{chr(ord('a') + i)})"  # Generar etiqueta dinámica: 'a)', 'b)', 'c)', 'd)'
    if i == 0:
        plot_data_first(axs[i], file_path, start, end, picks, label)
    else:
        plot_data_lastones(axs[i], file_path, start, end, picks, label)

#
plt.tight_layout()
plt.savefig('phasepick-plot.png', dpi=300, bbox_inches='tight')
plt.show()