In [1]:
# %% [code] {"jupyter":{"outputs_hidden":false}}
import gc

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

# check the tensorflow and GPU

# print(f"Using Tensorflow {tf.__version__}")
# device_name = tf.test.gpu_device_name()
# if device_name != "/device:GPU:0":
#     raise SystemError("GPU device not found")
# print(
#     f"Found GPU adn CPU.\nTensorFlow has access to the following devices:\n{tf.config.list_physical_devices()}"
# )


# import the datasets and check the files

import os

# path = "./phys591000-2023-final-project/"  # run on the local machine
# path = "/content/drive/Shareddrives/2023AI_final/2023AI_final/phys591000-2023-final-project/" # run on the google colab
path = "/kaggle/input/dune-neutrino/"  # Kaggle
if not os.path.isfile(path + "neutrino_test_data.npz") or os.path.isfile(
    path + "neutrino_train_data.npz"
):
    raise FileNotFoundError("test/train data was not found or is a directory")

# take out data from the datasets

data_train = np.load(path + "neutrino_training_data.npz")  # ideal data
data_test = np.load(path + "neutrino_test_data.npz")  # pseudo-exp data

name_train = data_train.files
name_test = data_test.files

(
    ve_train,
    vebar_train,
    vu_train,
    vubar_train,
    theta23_train,
    delta_train,
    ldm_train,
) = map(lambda n: data_train[n], name_train)
ve_test, vebar_test, vu_test, vubar_test = map(lambda n: data_test[n], name_test)


# create train and test data
X_train = np.stack((ve_train, vebar_train, vu_train, vubar_train), axis=-1)
Y_train = np.stack((theta23_train, delta_train, ldm_train), axis=-1)
# X_test = np.stack((ve_test, vebar_test, vu_test, vubar_test), axis=-1)

# [X] normalize training data to [0,1]
x_train_NH = X_train[ldm_train > 0]
print(f"Before normalized, the shape of x_train: {x_train_NH.shape}")
v_max = np.max(x_train_NH)
v_min = np.min(x_train_NH)
print(f"X-train normalized factors (v_max, v_min) = ({v_max}, {v_min})")
x_train_NH_norm = (x_train_NH - v_min) / (v_max - v_min)
print(f"After normalized, the shape of x_train: {x_train_NH_norm.shape}")
# [Y] normalize training label to [0,1]
y_train_NH = Y_train[:, 0][ldm_train > 0]
print(f"Before normalized, the shape of y_train: {y_train_NH.shape}")
v_max = np.max(y_train_NH)
v_min = np.min(y_train_NH)
y_train_NH_norm = (y_train_NH - v_min) / (v_max - v_min)
print(f"Y-train normalized factors (v_max, v_min) = ({v_max}, {v_min})")
print(f"After normalized, the shape of y_train: {y_train_NH_norm.shape}")

# clear unused variables
del (
    X_train,
    Y_train,
    ve_train,
    vebar_train,
    vu_train,
    vubar_train,
    theta23_train,
    delta_train,
    ldm_train,
    ve_test,
    vebar_test,
    vu_test,
    vubar_test,
)
gc.collect()

# create validation data
from sklearn.model_selection import train_test_split

# split the training dataset into training and validation, with test_size = 0.2
tf.random.set_seed(2023)
x_train, x_val, y_train, y_val = train_test_split(
    x_train_NH_norm,
    y_train_NH_norm,
    test_size=0.2,
    shuffle=True,
)
# clear unused variables
del x_train_NH_norm, y_train_NH_norm

from tensorflow.keras import Input, Model
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.layers import (
    AveragePooling2D,
    BatchNormalization,
    Conv2D,
    Conv2DTranspose,
    Dense,
    Dropout,
    Flatten,
    Lambda,
    Layer,
    LeakyReLU,
    Reshape,
)


def create_model_cvae(input_dim, latent_dim):
    def conv2d(inputs, filters, kernel_size):
        x = Conv2D(filters, kernel_size=kernel_size, strides=1, padding="same")(inputs)
        x = BatchNormalization()(x)
        x = LeakyReLU(alpha=0.2)(x)
        x = Conv2D(filters, kernel_size=kernel_size, strides=1, padding="same")(x)
        x = BatchNormalization()(x)
        x = LeakyReLU(alpha=0.2)(x)
        x = AveragePooling2D(pool_size=(2, 2))(x)
        return x

    # Encoder
    inputs = Input(shape=input_dim)
    x = conv2d(inputs, filters=64, kernel_size=4)
    x = conv2d(x, filters=32, kernel_size=4)
    x = Flatten()(x)
    x = Dense(256, kernel_regularizer=regularizers.l2(0.002), activation="elu")(x)
    x = Dense(64, kernel_regularizer=regularizers.l2(0.002), activation="elu")(x)
    x = Dense(16, kernel_regularizer=regularizers.l2(0.002), activation="elu")(x)
    x = Dense(1, activation="relu")(x)
    dcnn = Model(inputs, x, name="dnn")
    dcnn.summary()

    # CVAE + DNN
    model_inputs = Input(shape=input_dim)
    fin_dnn = dcnn(model_inputs)
    dcnn = Model(inputs=model_inputs, outputs=fin_dnn)
    return dcnn


from tensorflow.keras.optimizers import Adam

# Declare the model
cvae = create_model_cvae(
    input_dim=(x_train.shape[1], x_train.shape[2], 1), latent_dim=2
)

# Compile the model
cvae.compile(optimizer=Adam(5e-6), loss="huber")


from tensorflow.keras.callbacks import EarlyStopping

# train
early_stopping = EarlyStopping(
    monitor="val_loss",
    min_delta=0.005,
    patience=30,
    mode="auto",
    baseline=None,
    restore_best_weights=False,
)

cvae.fit(
    x=x_train,
    y=y_train,
    validation_data=(x_val, y_val),
    epochs=256,
    batch_size=64,
    callbacks=[early_stopping],
    shuffle=True,
    verbose=2,
)

# check the loss function
fig = plt.figure(figsize=(8, 5), dpi=120)
history = cvae.history.history
plt.plot(history["loss"], lw=2.5, label="Train", alpha=0.8)
plt.plot(history["val_loss"], lw=2.5, label="Validation", alpha=0.8)
plt.title("Epoch vs Huber loss")
plt.xlabel("epoch")
plt.ylabel("Loss (Huber)")
plt.legend(loc="best")
plt.savefig("CVAE_loss.png")
plt.close()

# save model
cvae.save("./CVAE_Theta23.h5")


caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


Before normalized, the shape of x_train: (500027, 65, 4)
X-train normalized factors (v_max, v_min) = (1919.03, 2.74423)


After normalized, the shape of x_train: (500027, 65, 4)
Before normalized, the shape of y_train: (500027,)
Y-train normalized factors (v_max, v_min) = (51.1, 38.9)
After normalized, the shape of y_train: (500027,)


Model: "dnn"


_________________________________________________________________


 Layer (type)                Output Shape              Param #   




 input_1 (InputLayer)        [(None, 65, 4, 1)]        0         


                                                                 


 conv2d (Conv2D)             (None, 65, 4, 64)         1088      


                                                                 


 batch_normalization (BatchN  (None, 65, 4, 64)        256       


 ormalization)                                                   


                                                                 


 leaky_re_lu (LeakyReLU)     (None, 65, 4, 64)         0         


                                                                 


 conv2d_1 (Conv2D)           (None, 65, 4, 64)         65600     


                                                                 


 batch_normalization_1 (Batc  (None, 65, 4, 64)        256       


 hNormalization)                                                 


                                                                 


 leaky_re_lu_1 (LeakyReLU)   (None, 65, 4, 64)         0         


                                                                 


 average_pooling2d (AverageP  (None, 32, 2, 64)        0         


 ooling2D)                                                       


                                                                 


 conv2d_2 (Conv2D)           (None, 32, 2, 32)         32800     


                                                                 


 batch_normalization_2 (Batc  (None, 32, 2, 32)        128       


 hNormalization)                                                 


                                                                 


 leaky_re_lu_2 (LeakyReLU)   (None, 32, 2, 32)         0         


                                                                 


 conv2d_3 (Conv2D)           (None, 32, 2, 32)         16416     


                                                                 


 batch_normalization_3 (Batc  (None, 32, 2, 32)        128       


 hNormalization)                                                 


                                                                 


 leaky_re_lu_3 (LeakyReLU)   (None, 32, 2, 32)         0         


                                                                 


 average_pooling2d_1 (Averag  (None, 16, 1, 32)        0         


 ePooling2D)                                                     


                                                                 


 flatten (Flatten)           (None, 512)               0         


                                                                 


 dense (Dense)               (None, 256)               131328    


                                                                 


 dense_1 (Dense)             (None, 64)                16448     


                                                                 


 dense_2 (Dense)             (None, 16)                1040      


                                                                 


 dense_3 (Dense)             (None, 1)                 17        


                                                                 




Total params: 265,505


Trainable params: 265,121


Non-trainable params: 384


_________________________________________________________________


Epoch 1/256


6251/6251 - 1082s - loss: 0.7611 - val_loss: 0.5876 - 1082s/epoch - 173ms/step


Epoch 2/256


6251/6251 - 1090s - loss: 0.4549 - val_loss: 0.3419 - 1090s/epoch - 174ms/step


Epoch 3/256


6251/6251 - 1096s - loss: 0.2635 - val_loss: 0.1988 - 1096s/epoch - 175ms/step


Epoch 4/256


6251/6251 - 1070s - loss: 0.1544 - val_loss: 0.1173 - 1070s/epoch - 171ms/step


Epoch 5/256


6251/6251 - 1089s - loss: 0.0901 - val_loss: 0.0669 - 1089s/epoch - 174ms/step


Epoch 6/256


6251/6251 - 1093s - loss: 0.0502 - val_loss: 0.0363 - 1093s/epoch - 175ms/step


Epoch 7/256


6251/6251 - 1074s - loss: 0.0268 - val_loss: 0.0192 - 1074s/epoch - 172ms/step


Epoch 8/256


6251/6251 - 1059s - loss: 0.0141 - val_loss: 0.0101 - 1059s/epoch - 169ms/step


Epoch 9/256


6251/6251 - 1132s - loss: 0.0076 - val_loss: 0.0056 - 1132s/epoch - 181ms/step


Epoch 10/256


6251/6251 - 1150s - loss: 0.0044 - val_loss: 0.0035 - 1150s/epoch - 184ms/step


Epoch 11/256


6251/6251 - 1212s - loss: 0.0029 - val_loss: 0.0027 - 1212s/epoch - 194ms/step


Epoch 12/256


6251/6251 - 1132s - loss: 0.0022 - val_loss: 0.0019 - 1132s/epoch - 181ms/step


Epoch 13/256


6251/6251 - 1094s - loss: 0.0018 - val_loss: 0.0017 - 1094s/epoch - 175ms/step


Epoch 14/256


6251/6251 - 1117s - loss: 0.0015 - val_loss: 0.0013 - 1117s/epoch - 179ms/step


Epoch 15/256


6251/6251 - 1093s - loss: 0.0013 - val_loss: 0.0013 - 1093s/epoch - 175ms/step


Epoch 16/256


6251/6251 - 1096s - loss: 0.0012 - val_loss: 0.0011 - 1096s/epoch - 175ms/step


Epoch 17/256


6251/6251 - 1075s - loss: 0.0011 - val_loss: 0.0010 - 1075s/epoch - 172ms/step


Epoch 18/256


6251/6251 - 1104s - loss: 9.8677e-04 - val_loss: 8.6567e-04 - 1104s/epoch - 177ms/step


Epoch 19/256


6251/6251 - 1145s - loss: 9.2128e-04 - val_loss: 9.2994e-04 - 1145s/epoch - 183ms/step


Epoch 20/256


6251/6251 - 1101s - loss: 8.6726e-04 - val_loss: 7.6858e-04 - 1101s/epoch - 176ms/step


Epoch 21/256


6251/6251 - 1100s - loss: 8.2519e-04 - val_loss: 7.6447e-04 - 1100s/epoch - 176ms/step


Epoch 22/256


6251/6251 - 1086s - loss: 7.8836e-04 - val_loss: 7.5724e-04 - 1086s/epoch - 174ms/step


Epoch 23/256


6251/6251 - 1095s - loss: 7.6208e-04 - val_loss: 7.5286e-04 - 1095s/epoch - 175ms/step


Epoch 24/256


6251/6251 - 1088s - loss: 7.3436e-04 - val_loss: 6.7998e-04 - 1088s/epoch - 174ms/step


Epoch 25/256


6251/6251 - 1110s - loss: 7.0997e-04 - val_loss: 7.8089e-04 - 1110s/epoch - 178ms/step


Epoch 26/256


6251/6251 - 1098s - loss: 6.8803e-04 - val_loss: 6.2929e-04 - 1098s/epoch - 176ms/step


Epoch 27/256


6251/6251 - 1123s - loss: 6.6875e-04 - val_loss: 7.6695e-04 - 1123s/epoch - 180ms/step


Epoch 28/256


6251/6251 - 1105s - loss: 6.4729e-04 - val_loss: 6.9505e-04 - 1105s/epoch - 177ms/step


Epoch 29/256


6251/6251 - 1109s - loss: 6.3147e-04 - val_loss: 5.6598e-04 - 1109s/epoch - 177ms/step


Epoch 30/256


6251/6251 - 1105s - loss: 6.2000e-04 - val_loss: 9.2291e-04 - 1105s/epoch - 177ms/step


Epoch 31/256


6251/6251 - 1078s - loss: 6.0630e-04 - val_loss: 5.4555e-04 - 1078s/epoch - 172ms/step


Epoch 32/256


6251/6251 - 1105s - loss: 5.9816e-04 - val_loss: 5.2419e-04 - 1105s/epoch - 177ms/step


Epoch 33/256


6251/6251 - 1153s - loss: 5.8282e-04 - val_loss: 5.2336e-04 - 1153s/epoch - 184ms/step


Epoch 34/256


6251/6251 - 1110s - loss: 5.7545e-04 - val_loss: 5.1152e-04 - 1110s/epoch - 178ms/step


Epoch 35/256


6251/6251 - 1090s - loss: 5.6436e-04 - val_loss: 4.9572e-04 - 1090s/epoch - 174ms/step


Epoch 36/256


6251/6251 - 1100s - loss: 5.5281e-04 - val_loss: 4.9337e-04 - 1100s/epoch - 176ms/step


Epoch 37/256


6251/6251 - 1116s - loss: 5.4566e-04 - val_loss: 4.8195e-04 - 1116s/epoch - 179ms/step


Epoch 38/256


6251/6251 - 1114s - loss: 5.3671e-04 - val_loss: 4.7612e-04 - 1114s/epoch - 178ms/step


Epoch 39/256


6251/6251 - 1108s - loss: 5.3063e-04 - val_loss: 4.6848e-04 - 1108s/epoch - 177ms/step
