In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import sys
import logging
import numpy as np
import pandas as pd
import tensorflow as tf
import warnings
from config import DATASET_CONFIGS
from utils import azimuthal_equidistant_projection, quadrilateral_area
from models import KernelPointProcess

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Suppress TensorFlow warnings
tf.compat.v1.disable_eager_execution()
warnings.filterwarnings("ignore")



In [2]:

# datasets_id should be in ["ComCat", "SaltonSea", "SanJac", "WHITE", "SCEDC_20", "SCEDC_25", "SCEDC_30"]

datasets_id = "SCEDC_30"


######## ETAS ########
# temporal_id = "empirical"
# spatial_id = "empirical"
# kappa_id = "empirical"
######## ETAS ########

######## NKF ########
temporal_id = "neural"
spatial_id = "neural"
kappa_id = "neural"
######## NKF ########

# You can also try different combinations.
# such as:
# temporal_id = "neural"
# spatial_id = "neural"
# kappa_id = "empirical"


if datasets_id not in DATASET_CONFIGS:
    logger.error(f"Unknown dataset: {datasets_id}")
    sys.exit(1)

config = DATASET_CONFIGS[datasets_id]

try:
    raw_catalog = pd.read_csv(config["catalog_path"])
    cat_shape = np.load(config["shape_path"])
except FileNotFoundError as e:
    logger.error(f"Failed to load dataset files: {e}")
    sys.exit(1)

# Filter catalog for SCEDC datasets
if datasets_id in ["SCEDC_20", "SCEDC_25", "SCEDC_30"]:
    min_magnitude = float(datasets_id.split("_")[-1]) / 10
    raw_catalog = raw_catalog[raw_catalog['magnitude'] >= min_magnitude].reset_index(drop=True)

# Split data by time
auxiliary_num = len(raw_catalog[
    (raw_catalog['time_days'] >= config["auxiliary_start"]) & 
    (raw_catalog['time_days'] < config["training_start"])
])
training_num = len(raw_catalog[
    (raw_catalog['time_days'] >= config["training_start"]) & 
    (raw_catalog['time_days'] < config["validation_start"])
])
validation_num = len(raw_catalog[
    (raw_catalog['time_days'] >= config["validation_start"]) & 
    (raw_catalog['time_days'] < config["testing_start"])
])
testing_num = len(raw_catalog[
    (raw_catalog['time_days'] >= config["testing_start"]) & 
    (raw_catalog['time_days'] <= config["testing_end"])
])

logger.info(f"{datasets_id}: auxiliary={auxiliary_num}, training={training_num}, "
            f"validation={validation_num}, testing={testing_num}")

input_dim_train = auxiliary_num
data_t_train = raw_catalog['time_days'].values[auxiliary_num - input_dim_train: auxiliary_num + training_num]
data_m_train = raw_catalog['magnitude'].values[auxiliary_num - input_dim_train: auxiliary_num + training_num]
data_x_train = raw_catalog['x'].values[auxiliary_num - input_dim_train: auxiliary_num + training_num]
data_y_train = raw_catalog['y'].values[auxiliary_num - input_dim_train: auxiliary_num + training_num]

input_dim_val = auxiliary_num
data_t_val = raw_catalog['time_days'].values[
    auxiliary_num + training_num - input_dim_val: auxiliary_num + training_num + validation_num
]
data_m_val = raw_catalog['magnitude'].values[
    auxiliary_num + training_num - input_dim_val: auxiliary_num + training_num + validation_num
]
data_x_val = raw_catalog['x'].values[
    auxiliary_num + training_num - input_dim_val: auxiliary_num + training_num + validation_num
]
data_y_val = raw_catalog['y'].values[
    auxiliary_num + training_num - input_dim_val: auxiliary_num + training_num + validation_num
]

input_dim_test = auxiliary_num
data_t_test = raw_catalog['time_days'].values[
    auxiliary_num + training_num + validation_num - input_dim_test: 
    auxiliary_num + training_num + validation_num + testing_num
]
data_m_test = raw_catalog['magnitude'].values[
    auxiliary_num + training_num + validation_num - input_dim_test: 
    auxiliary_num + training_num + validation_num + testing_num
]
data_x_test = raw_catalog['x'].values[
    auxiliary_num + training_num + validation_num - input_dim_test: 
    auxiliary_num + training_num + validation_num + testing_num
]
data_y_test = raw_catalog['y'].values[
    auxiliary_num + training_num + validation_num - input_dim_test: 
    auxiliary_num + training_num + validation_num + testing_num
]

center_latitude = raw_catalog['latitude'].mean()
center_longitude = raw_catalog['longitude'].mean()
cat_shape_x, cat_shape_y = azimuthal_equidistant_projection(
    cat_shape[:, 0], cat_shape[:, 1], center_latitude, center_longitude
)
cat_shape_xy = np.stack((cat_shape_x, cat_shape_y), axis=1)
obj_area = quadrilateral_area(cat_shape_xy)

model = KernelPointProcess(
    time_step_train=input_dim_train,
    time_step_val=input_dim_val,
    time_step_test=input_dim_test,
    temporal_id=temporal_id,
    spatial_id=spatial_id,
    kappa_id=kappa_id,
    global_m0=config["global_m0"],
    area=obj_area,
    size_layer=5,
    size_nn=32
).set_train_data(
    data_t_train, data_m_train, data_x_train, data_y_train
).set_val_data(
    data_t_val, data_m_val, data_x_val, data_y_val
).set_test_data(
    data_t_test, data_m_test, data_x_test, data_y_test
).set_model().compile().fit_eval(
    epochs=1000, batch_size=128
).eval_train().eval_val().eval_test().save_weights(
    f"weights/{datasets_id}_{temporal_id}_{spatial_id}_{kappa_id}"
)

logger.info(f"Test results: LL_ts={model.LL_ts_average_test}, "
            f"LL_t={model.LL_t_average_test}, LL_s={model.LL_s_average_test}")

try:
    with open("ll_seismic_model.log", "a") as log_file:
        log_file.write(
            f"{datasets_id}, {temporal_id}, {spatial_id}, {kappa_id}, "
            f"{model.LL_ts_average_train}, {model.LL_t_average_train}, {model.LL_s_average_train}, "
            f"{model.LL_ts_average_val}, {model.LL_t_average_val}, {model.LL_s_average_val}, "
            f"{model.LL_ts_average_test}, {model.LL_t_average_test}, {model.LL_s_average_test}\n"
        )
except IOError as e:
    logger.error(f"Failed to write to log file: {e}")

results_catalog = raw_catalog.copy()
results_catalog.loc[auxiliary_num: auxiliary_num + training_num - 1, 'step'] = 'train'
results_catalog.loc[auxiliary_num + training_num: auxiliary_num + training_num + validation_num - 1, 'step'] = 'val'
results_catalog.loc[
    auxiliary_num + training_num + validation_num: 
    auxiliary_num + training_num + validation_num + testing_num - 1, 'step'
] = 'test'

for dataset, logli_t, logli_s, logli_ts, loglam_t, loglam_ts, intlam in [
    ('train', model.LL_t_train, model.LL_s_train, model.LL_ts_train, np.log(model.lam_t_train), np.log(model.lam_ts_train), model.Int_lam_train),
    ('val', model.LL_t_val, model.LL_s_val, model.LL_ts_val, np.log(model.lam_t_val), np.log(model.lam_ts_val), model.Int_lam_val),
    ('test', model.LL_t_test, model.LL_s_test, model.LL_ts_test, np.log(model.lam_t_test), np.log(model.lam_ts_test), model.Int_lam_test)
]:
    start_idx = {'train': auxiliary_num, 'val': auxiliary_num + training_num, 'test': auxiliary_num + training_num + validation_num}[dataset]
    end_idx = start_idx + {'train': training_num, 'val': validation_num, 'test': testing_num}[dataset] - 1
    results_catalog.loc[start_idx:end_idx, 'logli_t'] = logli_t.flatten()
    results_catalog.loc[start_idx:end_idx, 'logli_s'] = logli_s.flatten()
    results_catalog.loc[start_idx:end_idx, 'logli_ts'] = logli_ts.flatten()
    results_catalog.loc[start_idx:end_idx, 'loglam_t'] = loglam_t.flatten()
    results_catalog.loc[start_idx:end_idx, 'loglam_ts'] = loglam_ts.flatten()
    results_catalog.loc[start_idx:end_idx, 'intlam'] = intlam.flatten()

try:
    results_catalog.to_csv(
        f"csv/{datasets_id}_{temporal_id}_{spatial_id}_{kappa_id}.csv",
        index=False,
        encoding='utf-8'
    )
    logger.info(f"Results saved to csv/{datasets_id}_{temporal_id}_{spatial_id}_{kappa_id}.csv")
except IOError as e:
    logger.error(f"Failed to save results to CSV: {e}")


2025-07-23 18:28:32,394 - INFO - SCEDC_30: auxiliary=1142, training=6815, validation=3135, testing=1898
2025-07-23 18:28:36,178 - INFO - Model and session initialized






























Train on 6815 samples, validate on 3135 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/

2025-07-23 18:32:03,518 - INFO - Weights saved to weights/SCEDC_30_neural_neural_neural
2025-07-23 18:32:03,519 - INFO - Test results: LL_ts=-5.940391540527344, LL_t=1.7972055673599243, LL_s=-7.7375969886779785
2025-07-23 18:32:03,662 - INFO - Results saved to csv/SCEDC_30_neural_neural_neural.csv
