In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dense, Flatten, Dropout, Input, LSTM, Bidirectional
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [2]:
# Load seismic data from the CSV file
def load_seismic_data(file_path):
    data = pd.read_csv(file_path)
    return data

# Preprocess the velocity data
def preprocess_data(velocity_data):
    # 1. Normalize the velocity data (zero mean, unit variance)
    normalized_data = (velocity_data - np.mean(velocity_data)) / np.std(velocity_data)
    
    # 2. Reshape to add the feature dimension (required by Conv1D)
    # reshaped_data = np.expand_dims(normalized_data, axis=-1)  # Shape: (timesteps, 1 feature)
    
    return normalized_data

def preprocess_derivative(derivative_data):
    normalized_derivative = (derivative_data - np.mean(derivative_data)) / np.std(derivative_data)
    return normalized_derivative

def calculate_derivative(velocity_data):
    derivative = np.diff(velocity_data, prepend=velocity_data[0])  # Calculate the derivative
    return derivative

# Model architecture: 1D Convolutional Neural Network
def build_model_with_lstm(input_shape):
    model = Sequential()

    # Input layer
    model.add(Input(shape=input_shape))
    
    # Conv1D layers for initial feature extraction
    model.add(Conv1D(64, kernel_size=5, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))

    # Add a Bidirectional LSTM to capture temporal dependencies
    model.add(Bidirectional(LSTM(128, return_sequences=False)))

    # Dense and output layers
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))  # Output the predicted relative time (in seconds)
    
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    
    return model

# Load and process the catalog
cat_directory = './data/lunar/training/catalogs/'
cat_file = cat_directory + 'apollo12_catalog_GradeA_final.csv'
cat = pd.read_csv(cat_file)

# Prepare training data (seismic data + labels for quake start detection)
X_train = []
y_train = []

# Define a max length for padding/truncating
MAX_TIMESTEPS = 60000  # You can adjust this based on your data

# Loop through the catalog
for i, (file_name, _, start_time, _, quake_type) in cat.iterrows():
    file_path = f"./data/lunar/training/data/S12_GradeA/{file_name}.csv"
    if not os.path.isfile(file_path):
        continue

    # Load seismic data
    data_chunk = load_seismic_data(file_path)
    velocity = data_chunk['velocity(m/s)'].values
    time = data_chunk['time_rel(sec)'].values

    velocity_derivative = calculate_derivative(velocity)
    processed_velocity = preprocess_data(velocity)
    processed_derivative = preprocess_derivative(velocity_derivative)

    # Stack both velocity and its derivative as input features
    stacked_data = np.stack((processed_velocity, processed_derivative), axis=-1)  # Shape: (timesteps, 2 features)

    # Labeling: Create a label with the exact time index of the quake start
    start_index = np.argmin(np.abs(time - start_time))  # Closest index to the start time
    
    # Append processed data and the start index to the training set
    X_train.append(stacked_data)
    y_train.append(start_index)  # The label is the index of the quake start

# Convert lists to numpy arrays with consistent time series length using padding
X_train_padded = pad_sequences(X_train, maxlen=MAX_TIMESTEPS, dtype='float32', padding='post', truncating='post')

# Convert lists to numpy arrays for model training
X_train = np.array(X_train_padded)
y_train = np.array(y_train)  # The labels are now the start indices

# Check the shape of X_train and y_train
print("X_train shape:", X_train.shape)  # Should be (samples, timesteps, 1)
print("y_train shape:", y_train.shape)  # Should be (samples,)
print(X_train[0])

# Split data into train and test sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

#normalize to 0-1
y_train = y_train / MAX_TIMESTEPS  # MAX_TIMESTEPS is 60000 or the max length of the sequence
y_val = y_val / MAX_TIMESTEPS

# Build the model
# Build the updated model
input_shape = (X_train.shape[1], X_train.shape[2])
model = build_model_with_lstm(input_shape)

X_train shape: (75, 60000, 2)
y_train shape: (75,)
[[ 2.2174739e-03 -1.2078283e-10]
 [ 2.1736217e-03 -6.3413107e-05]
 [ 2.1539365e-03 -2.8466082e-05]
 ...
 [-1.2574996e-01 -2.6085263e-01]
 [-2.6779351e-01 -2.0540342e-01]
 [-1.6692266e-01  1.4586525e-01]]


2024-10-05 22:56:37.000958: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2024-10-05 22:56:37.001010: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-10-05 22:56:37.001019: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-10-05 22:56:37.001045: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-10-05 22:56:37.001068: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [3]:
# Train the model
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_val, y_val),verbose=2)

Epoch 1/30


2024-10-05 22:56:47.460688: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


2/2 - 16s - 8s/step - loss: 29.9265 - mae: 4.6440 - val_loss: 23.8741 - val_mae: 4.2196
Epoch 2/30
2/2 - 15s - 7s/step - loss: 28.2550 - mae: 4.4706 - val_loss: 21.9593 - val_mae: 3.9804
Epoch 3/30
2/2 - 15s - 8s/step - loss: 26.2404 - mae: 4.2965 - val_loss: 18.6631 - val_mae: 3.6301
Epoch 4/30
2/2 - 14s - 7s/step - loss: 22.5816 - mae: 3.9723 - val_loss: 12.9555 - val_mae: 3.0311
Epoch 5/30
2/2 - 14s - 7s/step - loss: 15.6430 - mae: 3.3053 - val_loss: 16.4728 - val_mae: 3.3451
Epoch 6/30
2/2 - 14s - 7s/step - loss: 16.5644 - mae: 3.3614 - val_loss: 8.2889 - val_mae: 2.3865
Epoch 7/30
2/2 - 17s - 8s/step - loss: 11.6307 - mae: 3.0241 - val_loss: 7.5794 - val_mae: 2.2741
Epoch 8/30
2/2 - 14s - 7s/step - loss: 10.6500 - mae: 2.8534 - val_loss: 7.1653 - val_mae: 2.2205
Epoch 9/30
2/2 - 13s - 7s/step - loss: 10.1811 - mae: 2.7537 - val_loss: 6.7059 - val_mae: 2.1720
Epoch 10/30
2/2 - 14s - 7s/step - loss: 9.0707 - mae: 2.6211 - val_loss: 6.3449 - val_mae: 2.1256
Epoch 11/30
2/2 - 14s - 7s

In [4]:
# Evaluate the model
loss, mae = model.evaluate(X_val, y_val)
print(f"Validation MAE: {mae:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 5.0824 - mae: 1.9157
Validation MAE: 1.9157


In [5]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Function to load seismic data
def load_seismic_data(file_path):
    data = pd.read_csv(file_path)
    return data

# Define the directory for test files
test_directory = "./data/lunar/test/data/S12_GradeB/"
output_plot_dir = "./predictions_plots_v2/"  # Directory to save plots

# Ensure output directory exists
os.makedirs(output_plot_dir, exist_ok=True)

# Function to predict moonquake start and save plot for each test file
def predict_and_plot_for_all_files(model, test_directory, max_timesteps=60000):
    predictions = []  # Store the results here
    
    # Iterate through all the CSV files in the test directory
    for file_name in os.listdir(test_directory):
        if file_name.endswith(".csv"):  # Only process CSV files
            file_path = os.path.join(test_directory, file_name)
            print(f"Processing file: {file_name}")  # For debugging/logging
            
            # Load seismic data from the file
            data_chunk = load_seismic_data(file_path)
            velocity = data_chunk['velocity(m/s)'].values
            time = data_chunk['time_rel(sec)'].values
            
            velocity_derivative = calculate_derivative(velocity)
            processed_velocity = preprocess_data(velocity)
            processed_derivative = preprocess_derivative(velocity_derivative)

            # Stack both velocity and its derivative as input features
            stacked_data = np.stack((processed_velocity, processed_derivative), axis=-1)  # Shape: (timesteps, 2 features)
            # Preprocess the velocity data
            stacked_data = pad_sequences([stacked_data], maxlen=max_timesteps, dtype='float32', padding='post', truncating='post')

            # Make prediction
            predicted_index = model.predict(stacked_data)
            predicted_index = int(predicted_index[0] * MAX_TIMESTEPS)  # Scale back to original range
            
            # Get the predicted start time using the index
            predicted_start_time = time[predicted_index]

            # Plotting the seismic data and the predicted moonquake start
            plt.figure(figsize=(10, 6))
            plt.plot(time, velocity, label="Seismic Velocity Data", color='blue')
            
            # Add a red line for the predicted quake start time
            plt.axvline(x=predicted_start_time, color='red', linestyle='--', label=f"Predicted Quake Start: {predicted_start_time:.2f}s")
            
            plt.title(f"Moonquake Prediction for {file_name}")
            plt.xlabel("Time (seconds)")
            plt.ylabel("Velocity (m/s)")
            plt.legend()
            
            # Save the plot to a file
            plot_filename = os.path.join(output_plot_dir, f"{file_name}_prediction_plot.png")
            plt.savefig(plot_filename)
            plt.close()  # Close the plot to avoid displaying it during processing

            # Store the result (file_name, predicted_start_time)
            predictions.append((file_name, predicted_start_time))
    
    return predictions

# Example usage
predictions = predict_and_plot_for_all_files(model, test_directory)

# Output the predictions
for file_name, predicted_start_time in predictions:
    print(f"File: {file_name}, Predicted Moonquake Start Time: {predicted_start_time}")

Processing file: xa.s12.00.mhz.1970-05-23HR00_evid00027.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 579ms/step
Processing file: xa.s12.00.mhz.1977-04-11HR00_evid00915.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 349ms/step
Processing file: xa.s12.00.mhz.1970-07-18HR00_evid00036.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 349ms/step
Processing file: xa.s12.00.mhz.1971-11-24HR00_evid00156.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 339ms/step
Processing file: xa.s12.00.mhz.1972-12-06HR00_evid00342.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 342ms/step
Processing file: xa.s12.00.mhz.1974-03-14HR00_evid00506.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 348ms/step
Processing file: xa.s12.00.mhz.1973-11-22HR00_evid00475.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 349ms/step
Processing file: xa.s12.00.mhz.1971-04-08HR01_evid00083.csv
[1m1/1[

In [8]:
# Define the directory for test files
test_directory = "./data/lunar/test/data/S12_GradeB/"

# Function to iterate through the test files and make predictions
def predict_moonquake_starts_for_all_files(model, test_directory, max_timesteps=60000):
    predictions = []  # Store the results here
    
    # Iterate through all the CSV files in the test directory
    for file_name in os.listdir(test_directory):
        if file_name.endswith(".csv"):  # Only process CSV files
            file_path = os.path.join(test_directory, file_name)
            print(f"Processing file: {file_name}")  # For debugging/logging
            
            # Load seismic data from the file
            data_chunk = load_seismic_data(file_path)
            velocity = data_chunk['velocity(m/s)'].values
            time = data_chunk['time_rel(sec)'].values
            
            velocity_derivative = calculate_derivative(velocity)
            processed_velocity = preprocess_data(velocity)
            processed_derivative = preprocess_derivative(velocity_derivative)

            # Stack both velocity and its derivative as input features
            stacked_data = np.stack((processed_velocity, processed_derivative), axis=-1)  # Shape: (timesteps, 2 features)

            # Preprocess the velocity data
            processed_velocity = preprocess_data(velocity)
            processed_velocity = pad_sequences([processed_velocity], maxlen=max_timesteps, dtype='float32', padding='post', truncating='post')

            # Make prediction
            predicted_index = model.predict(processed_velocity)
            predicted_index = int(predicted_index[0])  # Convert prediction to integer index
            
            # Get the predicted start time using the index
            predicted_start_time = time[predicted_index]

            # Store the result (file_name, predicted_start_time)
            predictions.append((file_name, predicted_start_time))
    
    return predictions

# Example usage
predictions = predict_moonquake_starts_for_all_files(model, test_directory)

# Output the predictions
for file_name, predicted_start_time in predictions:
    print(f"File: {file_name}, Predicted Moonquake Start Time: {predicted_start_time}")

Processing file: xa.s12.00.mhz.1970-05-23HR00_evid00027.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 911ms/step
Processing file: xa.s12.00.mhz.1977-04-11HR00_evid00915.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Processing file: xa.s12.00.mhz.1970-07-18HR00_evid00036.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Processing file: xa.s12.00.mhz.1971-11-24HR00_evid00156.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Processing file: xa.s12.00.mhz.1972-12-06HR00_evid00342.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Processing file: xa.s12.00.mhz.1974-03-14HR00_evid00506.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Processing file: xa.s12.00.mhz.1973-11-22HR00_evid00475.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Processing file: xa.s12.00.mhz.1971-04-08HR01_evid00083.csv
[1m1/1[0m [3

In [6]:

# Evaluate the model
loss, mae = model.evaluate(X_val, y_val)
print(f"Validation MAE: {mae:.4f}")

# Predict moonquake start on new data
def predict_moonquake_start(file_path):
    data_chunk = load_seismic_data(file_path)
    velocity = data_chunk['velocity(m/s)'].values
    processed_velocity = preprocess_data(velocity)
    processed_velocity = pad_sequences([processed_velocity], maxlen=MAX_TIMESTEPS, dtype='float32', padding='post', truncating='post')

    # Make prediction
    predicted_index = model.predict(processed_velocity)
    predicted_index = int(predicted_index[0])  # Convert prediction to integer index
    
    start_time = data_chunk['time_rel(sec)'][predicted_index]
    return start_time

# Example prediction on new test data
test_file_path = "./data/lunar/test/data/S12_GradeB/test_file.csv"
predicted_start_time = predict_moonquake_start(test_file_path)
print(f"Predicted Moonquake Start Time: {predicted_start_time}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 457ms/step - loss: 50491600896.0000 - mae: 188528.8594
Validation MAE: 188528.8594


FileNotFoundError: [Errno 2] No such file or directory: './data/lunar/test/S12_GradeA/test_file.csv'