In [13]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dense, Flatten, Dropout, Input
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [14]:
# Load seismic data from the CSV file
def load_seismic_data(file_path):
    data = pd.read_csv(file_path)
    return data

# Preprocess the velocity data
def preprocess_data(velocity_data):
    # 1. Normalize the velocity data (zero mean, unit variance)
    normalized_data = (velocity_data - np.mean(velocity_data)) / np.std(velocity_data)
    
    # 2. Reshape to add the feature dimension (required by Conv1D)
    reshaped_data = np.expand_dims(normalized_data, axis=-1)  # Shape: (timesteps, 1 feature)
    # print(normalized_data.shape, reshaped_data.shape)
    
    return reshaped_data

def build_model(input_shape):
    model = Sequential()
    
    # Use the Input layer explicitly
    model.add(Input(shape=input_shape))
    
    # Then proceed with the rest of the layers
    model.add(Conv1D(32, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(64, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification (quake start or not)
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Load and process the catalog
cat_directory = './data/lunar/training/catalogs/'
cat_file = cat_directory + 'apollo12_catalog_GradeA_final.csv'
cat = pd.read_csv(cat_file)

# Prepare training data (seismic data + labels for quake start detection)
X_train = []
y_train = []

# Define a max length for padding/truncating
MAX_TIMESTEPS = 60000  # You can adjust this based on your data

# Loop through the catalog
for i, (file_name, _, start_time, _, quake_type) in cat.iterrows():
    file_path = f"./data/lunar/training/data/S12_GradeA/{file_name}.csv"
    if not os.path.isfile(file_path):
        continue

    # Load seismic data
    data_chunk = load_seismic_data(file_path)
    velocity = data_chunk['velocity(m/s)'].values
    time = data_chunk['time_rel(sec)'].values

    # Preprocess velocity data
    processed_velocity = preprocess_data(velocity)  # Shape: (timesteps, 1 feature)

    # Labeling: Create a binary label for detecting quake start
    start_index = np.argmin(np.abs(time - start_time))  # Closest index to the start time
    labels = np.zeros(len(time))
    labels[start_index] = 1  # Label the start of the moonquake

    # Append processed data and labels to training set
    X_train.append(processed_velocity)
    y_train.append(labels)

# Convert lists to numpy arrays with consistent time series length using padding
X_train_padded = pad_sequences(X_train, maxlen=MAX_TIMESTEPS, dtype='float32', padding='post', truncating='post')
y_train_padded = pad_sequences(y_train, maxlen=MAX_TIMESTEPS, dtype='float32', padding='post', truncating='post')

# Convert lists to numpy arrays for model training
X_train = np.array(X_train_padded)
y_train = np.array(y_train_padded)

In [15]:
# Check the shape of X_train and y_train
print("X_train shape:", X_train.shape)  # DEBUGGING STEP
print("y_train shape:", y_train.shape)  # DEBUGGING STEP

# Split data into train and test sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Build the model
input_shape = (X_train.shape[1], X_train.shape[2])  # Shape: (timesteps, features)
model = build_model(input_shape)

X_train shape: (75, 60000, 1)
y_train shape: (75, 60000)


In [16]:
# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val), verbose=2)

Epoch 1/20


ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 60000), output.shape=(None, 1)

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_val, y_val)
print(f"Validation Accuracy: {accuracy:.4f}")

# Predict moonquake start on new data
def predict_moonquake_start(file_path):
    data_chunk = load_seismic_data(file_path)
    velocity = data_chunk['velocity(m/s)'].values
    processed_velocity = preprocess_data(velocity)
    processed_velocity = np.expand_dims(processed_velocity, axis=0)  # Add batch dimension

    # Make prediction
    predictions = model.predict(processed_velocity)
    start_time_index = np.argmax(predictions)  # Get the index with the highest prediction
    start_time = data_chunk['time_rel(sec)'].values[start_time_index]

    return start_time

# Example prediction on new test data
test_file_path = "./data/lunar/test/S12_GradeA/test_file.csv"
predicted_start_time = predict_moonquake_start(test_file_path)
print(f"Predicted Moonquake Start Time: {predicted_start_time}")