In [2]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dense, Flatten, Dropout, Input, LSTM, Bidirectional
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [3]:
# Load seismic data from the CSV file
def load_seismic_data(file_path):
    data = pd.read_csv(file_path)
    return data

# Preprocess the velocity data
def preprocess_data(velocity_data):
    # 1. Normalize the velocity data (zero mean, unit variance)
    normalized_data = (velocity_data - np.mean(velocity_data)) / np.std(velocity_data)
    
    # 2. Reshape to add the feature dimension (required by Conv1D)
    reshaped_data = np.expand_dims(normalized_data, axis=-1)  # Shape: (timesteps, 1 feature)
    
    return reshaped_data

# Model architecture: 1D Convolutional Neural Network
def build_complex_model(input_shape):
    model = Sequential()
    
    # Input layer
    model.add(Input(shape=input_shape))
    
    # First Conv1D layer
    model.add(Conv1D(64, kernel_size=5, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    
    # Second Conv1D layer
    model.add(Conv1D(128, kernel_size=5, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))

    # Optionally, you could try using LSTM for sequence learning
    model.add(LSTM(128, return_sequences=False))

    # Dense and output layers
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))  # Output the predicted (normalized) index
    
    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    
    return model

# Load and process the catalog
cat_directory = './data/lunar/training/catalogs/'
cat_file = cat_directory + 'apollo12_catalog_GradeA_final.csv'
cat = pd.read_csv(cat_file)

# Prepare training data (seismic data + labels for quake start detection)
X_train = []
y_train = []

# Define a max length for padding/truncating
MAX_TIMESTEPS = 60000  # You can adjust this based on your data

# Loop through the catalog
for i, (file_name, _, start_time, _, quake_type) in cat.iterrows():
    file_path = f"./data/lunar/training/data/S12_GradeA/{file_name}.csv"
    if not os.path.isfile(file_path):
        continue

    # Load seismic data
    data_chunk = load_seismic_data(file_path)
    velocity = data_chunk['velocity(m/s)'].values
    time = data_chunk['time_rel(sec)'].values

    # Preprocess velocity data
    processed_velocity = preprocess_data(velocity)  # Shape: (timesteps, 1 feature)

    # Labeling: Create a label with the exact time index of the quake start
    start_index = np.argmin(np.abs(time - start_time))  # Closest index to the start time
    
    # Append processed data and the start index to the training set
    X_train.append(processed_velocity)
    y_train.append(start_index)  # The label is the index of the quake start

# Convert lists to numpy arrays with consistent time series length using padding
X_train_padded = pad_sequences(X_train, maxlen=MAX_TIMESTEPS, dtype='float32', padding='post', truncating='post')

# Convert lists to numpy arrays for model training
X_train = np.array(X_train_padded)
y_train = np.array(y_train)  # The labels are now the start indices

# Check the shape of X_train and y_train
print("X_train shape:", X_train.shape)  # Should be (samples, timesteps, 1)
print("y_train shape:", y_train.shape)  # Should be (samples,)

# Split data into train and test sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

#normalize to 0-1
y_train = y_train / MAX_TIMESTEPS  # MAX_TIMESTEPS is 60000 or the max length of the sequence
y_val = y_val / MAX_TIMESTEPS

# Build the model
# Build the updated model
input_shape = (X_train.shape[1], X_train.shape[2])
model = build_complex_model(input_shape)

X_train shape: (75, 60000, 1)
y_train shape: (75,)


2024-10-05 23:19:07.752221: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2024-10-05 23:19:07.752252: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-10-05 23:19:07.752262: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-10-05 23:19:07.752282: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-10-05 23:19:07.752297: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [4]:
# Train the model
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_val, y_val))

Epoch 1/30


2024-10-05 23:19:09.651992: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 5s/step - loss: 29.7001 - mae: 4.6256 - val_loss: 23.8660 - val_mae: 4.2098
Epoch 2/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step - loss: 31.0195 - mae: 4.7731 - val_loss: 21.5260 - val_mae: 3.9409
Epoch 3/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step - loss: 24.5215 - mae: 4.1095 - val_loss: 17.3962 - val_mae: 3.5267
Epoch 4/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3s/step - loss: 20.3688 - mae: 3.7269 - val_loss: 11.8777 - val_mae: 2.8626
Epoch 5/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4s/step - loss: 14.9464 - mae: 3.2293 - val_loss: 7.4398 - val_mae: 2.2511
Epoch 6/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4s/step - loss: 10.7810 - mae: 2.8680 - val_loss: 7.7991 - val_mae: 2.3013
Epoch 7/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3s/step - loss: 8.6311 - mae: 2.5374 - val_lo

In [None]:
# Evaluate the model
loss, mae = model.evaluate(X_val, y_val)
print(f"Validation MAE: {mae:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 647ms/step - loss: 4.8298 - mae: 1.8516
Validation MAE: 1.8516


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Function to load seismic data
def load_seismic_data(file_path):
    data = pd.read_csv(file_path)
    return data

# Function to preprocess the velocity data
def preprocess_data(velocity_data):
    # Normalize the velocity data (zero mean, unit variance)
    normalized_data = (velocity_data - np.mean(velocity_data)) / np.std(velocity_data)
    
    # Reshape to add the feature dimension (required by Conv1D)
    reshaped_data = np.expand_dims(normalized_data, axis=-1)  # Shape: (timesteps, 1 feature)
    
    return reshaped_data

# Define the directory for test files
test_directory = "./data/lunar/test/data/S12_GradeB/"
output_plot_dir = "./predictions_plots_v1/"  # Directory to save plots

# Ensure output directory exists
os.makedirs(output_plot_dir, exist_ok=True)

# Function to predict moonquake start and save plot for each test file
def predict_and_plot_for_all_files(model, test_directory, max_timesteps=60000):
    predictions = []  # Store the results here
    
    # Iterate through all the CSV files in the test directory
    for file_name in os.listdir(test_directory):
        if file_name.endswith(".csv"):  # Only process CSV files
            file_path = os.path.join(test_directory, file_name)
            print(f"Processing file: {file_name}")  # For debugging/logging
            
            # Load seismic data from the file
            data_chunk = load_seismic_data(file_path)
            velocity = data_chunk['velocity(m/s)'].values
            time = data_chunk['time_rel(sec)'].values
            
            # Preprocess the velocity data
            processed_velocity = preprocess_data(velocity)
            processed_velocity = pad_sequences([processed_velocity], maxlen=max_timesteps, dtype='float32', padding='post', truncating='post')

            # Make prediction
            predicted_index = model.predict(processed_velocity)
            predicted_index = int(predicted_index[0] * MAX_TIMESTEPS)  # Scale back to original range
            
            # Get the predicted start time using the index
            predicted_start_time = time[predicted_index]

            # Plotting the seismic data and the predicted moonquake start
            plt.figure(figsize=(10, 6))
            plt.plot(time, velocity, label="Seismic Velocity Data", color='blue')
            
            # Add a red line for the predicted quake start time
            plt.axvline(x=predicted_start_time, color='red', linestyle='--', label=f"Predicted Quake Start: {predicted_start_time:.2f}s")
            
            plt.title(f"Moonquake Prediction for {file_name}")
            plt.xlabel("Time (seconds)")
            plt.ylabel("Velocity (m/s)")
            plt.legend()
            
            # Save the plot to a file
            plot_filename = os.path.join(output_plot_dir, f"{file_name}_prediction_plot.png")
            plt.savefig(plot_filename)
            plt.close()  # Close the plot to avoid displaying it during processing

            # Store the result (file_name, predicted_start_time)
            predictions.append((file_name, predicted_start_time))
    
    return predictions

# Example usage
predictions = predict_and_plot_for_all_files(model, test_directory)

# Output the predictions
for file_name, predicted_start_time in predictions:
    print(f"File: {file_name}, Predicted Moonquake Start Time: {predicted_start_time}")

NameError: name 'model' is not defined