In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

In [None]:
# load the dataframe
df = pd.read_csv('./3firstmonths_newdistance.csv')

# filter the data for the first panel
df_filtered = df[(df['distance_km'] < 60) | (df['distance_km'] > 70)]

# create two dataframes for the panels
datasets = [("Excluding Mt St Helens and Mt Adams", df_filtered), (" ", df)]

# define depth ranges and plotting styles
depth_ranges = [(0, 5), (5, 15)]
colors = ['black', 'black']
linestyles = ['-', '-']
num_iterations = 1000

# create figure with 2 rows and 2 columns
fig, axes = plt.subplots(2, 2, figsize=(18, 14), sharey=True)

labels = ['a)', 'b)', 'c)', 'd)']  # subplot labels

for j, (title, data) in enumerate(datasets):
    for i, (depth_min, depth_max) in enumerate(depth_ranges):
        ax = axes[j, i]
        
        # add label in upper-left corner
        ax.text(0.02, 0.98, labels[j*2 + i], transform=ax.transAxes, fontsize=22, fontweight='normal', 
                verticalalignment='top', horizontalalignment='left')

        # filter data by depth range
        df_depth_range = data[(data['depth'] >= depth_min) & (data['depth'] < depth_max)].copy()

        # compute hypocentral distance
        df_depth_range['hypocentral_distance_km'] = np.sqrt(df_depth_range['distance_km'] ** 2 + df_depth_range['depth'] ** 2)
        df_depth_range['log_hypocentral_distance_km'] = np.log10(df_depth_range['hypocentral_distance_km'])
        df_depth_range = df_depth_range.replace([np.inf, -np.inf], np.nan).dropna(subset=['log_hypocentral_distance_km', 'magnitude'])

        # convert 'DAS_visibility' to binary labels
        df_depth_range['visibility_label'] = np.where(df_depth_range['DAS_visibility'] == 'n', 0, 1)

        X = df_depth_range[['log_hypocentral_distance_km', 'magnitude']].values
        y = df_depth_range['visibility_label'].values

        slopes = []
        coefs  = []
        for _ in range(num_iterations):
            X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2, random_state=None)
            lda = LDA()
            lda.fit(X_train, y_train)
            coef = lda.coef_[0]
            slope = -coef[0] / coef[1]
            slopes.append(slope)
            coefs.append(coef)

        slope_mean = np.mean(slopes)
        slope_std  = np.std(slopes)
        
        coefs_mean = np.mean(coefs, axis=0)
        coefs_std  = np.std(coefs, axis=0)

        intercept = lda.intercept_[0]
        intercept_line = -intercept / coefs_mean[1]

        # create decision boundary points in log scale
        xx = np.linspace(X_train[:, 0].min() - 0.1, X_train[:, 0].max() + 0.1, 100)
        yy = slope_mean * xx + intercept_line

        # plot decision boundary
        label = (f'$\mathbf{{Depth={depth_min}-{depth_max}\ km}}$\n'
                 f'b = {slope_mean:.2f} ± {slope_std:.2f}\n'
                 f'a = {coefs_mean[0]:.2f} ± {coefs_std[0]:.2f}')
        ax.plot(10**xx, yy, color=colors[i], linestyle=linestyles[i], label=label, linewidth=2)

        # plot training points
        ax.scatter(10**X_train[y_train == 0][:, 0], X_train[y_train == 0][:, 1], color='navy', label='not visible', alpha=0.7, edgecolor='w', s=120, marker='o')
        ax.scatter(10**X_train[y_train == 1][:, 0], X_train[y_train == 1][:, 1], color='darkorange', label='visible', alpha=0.7, edgecolor='w', s=120, marker='s')

        # configure log scale for x-axis and tick labels
        ax.set_xscale('log')
        ax.set_xticks([1, 10, 100])
        ax.get_xaxis().set_major_formatter(plt.ScalarFormatter())
        ax.set_xlabel('hypocentral distance [km]', fontsize=20)
        ax.set_title(title, fontsize=16)
        ax.tick_params(axis="x", labelsize=20)
        ax.tick_params(axis="y", labelsize=20)

# common y-axis label, legends and layout adjustments
axes[0, 0].set_ylabel('magnitude', fontsize=20)
axes[1, 0].set_ylabel('magnitude', fontsize=20)
for ax in axes.flat:
    ax.legend(loc='upper left', fontsize=15, frameon=True, framealpha=0.1, fancybox=True, borderpad=1)
    ax.grid(True, linestyle='--', alpha=0.5)

plt.tight_layout()
plt.savefig('lda_paperfourpanels.png', dpi=300)
plt.show()
