<a href="https://colab.research.google.com/github/wilmi94/MasterThesis-AE/blob/main/notebooks/sdo_e2e_ConvLSTM_171.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SDO/AIA-171A End-to-End ConvLSTM Model

> This notebook is part of the Master Thesis *Predicting Coronal Mass Ejections using Machine Learning methods* by Wilmar Ender, FH Wiener Neustadt, 2023.

**Objective:** \\
This notebook aims to perform simple data exploration tasks on  SDO/AIA dataset.

**Solar event list:** \\
*Liu et al. 2020, Predicting Coronal Mass Ejections Using SDO/HMI Vector Magnetic Data Products and Recurrent Neural Networks*

**Dataset:** \\
*Ahmadzadeh et al. 2019, A Curated Image Parameter Data Set from the Solar Dynamics Observatory Mission*. \\
Accessed via *sdo-cli* (https://github.com/i4Ds/sdo-cli)

*Predicting Solar Flares Using a Long Short-term Memory Network. Liu, H., Liu, C., Wang, J. T. L., Wang, H., ApJ., 877:121, 2019.*


...

## Setting up the Notebook

In [1]:
%%capture
pip install -U sdo-cli

In [2]:
import pandas as pd
import csv
import sys
import numpy as np
import os

from keras.utils import np_utils
from keras.models import *
from keras.layers import *

import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
try :
    import tensorflow as tf
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
except Exception as e:
    print('turn off loggins is not supported')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import class_weight

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [3]:
# mount Google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Change present working directory
%cd /content/drive/MyDrive/Academia/MSc. Aerospace Engineering - FH Wiener Neustadt/4. Master Thesis/03-Work/

/content/drive/MyDrive/Academia/MSc. Aerospace Engineering - FH Wiener Neustadt/4. Master Thesis/03-Work


In [5]:
# check content
!ls -a

 00_Dataset				    save_at_14.keras   save_at_24.keras
 01_sdo_data_exploration		    save_at_15.keras   save_at_25.keras
 02_sdo_binclass			    save_at_16.keras   save_at_2.keras
 03_sdo_ConvLSTM			    save_at_17.keras   save_at_3.keras
 04_Tests				    save_at_18.keras   save_at_4.keras
'Master Thesis-ML-Project-Checklist.gdoc'   save_at_19.keras   save_at_5.keras
 model.png				    save_at_1.keras    save_at_6.keras
 save_at_10.keras			    save_at_20.keras   save_at_7.keras
 save_at_11.keras			    save_at_21.keras   save_at_8.keras
 save_at_12.keras			    save_at_22.keras   save_at_9.keras
 save_at_13.keras			    save_at_23.keras   .sdo-cli


## Helper Functions

In [6]:
def compare_filenames_with_dataframe(directory, dataframe, wavelength):
    file_end = '_' + str(wavelength) + '.jpeg'
    # Get list of filenames from the directory
    directory_filenames = [filename for filename in os.listdir(directory) if filename.endswith(file_end)]

    # Get list of names from the DataFrame
    df_check = dataframe.copy()
    df_check['Timestamp'] = pd.to_datetime(df_check['Timestamp']).dt.strftime('%Y-%m-%dT%H%M%S__171.jpeg') # Warning! wavelength is now hard coded

    dataframe_names = df_check['Timestamp'].tolist()  # the name of the image should correspond to the timestamp

    # Compare filenames
    common_filenames = set(directory_filenames) & set(dataframe_names)
    missing_filenames = set(dataframe_names) - set(directory_filenames)
    extra_filenames = set(directory_filenames) - set(dataframe_names)

    df_missing = pd.DataFrame (data = missing_filenames,  columns=['Timestamp'])
    df_missing['Timestamp'] = df_missing['Timestamp'].str.replace(r'__171.jpeg', '', regex=True)
    df_missing['Timestamp'] = pd.to_datetime(df_missing['Timestamp']).dt.strftime('%Y-%m-%dT%H:%M:%S')

    # Calculate statistics
    total_directory_files = len(directory_filenames)
    total_dataframe_names = len(dataframe_names)
    total_common_files = len(common_filenames)
    total_missing_files = len(missing_filenames)
    total_extra_files = len(extra_filenames)
    print('Total Directory Files: ', total_directory_files)
    print('Total DataFrame Names: ', total_dataframe_names)
    print('Common Files: ', total_common_files)
    print('Missing Files: ', total_missing_files)
    print('Extra Files: ', total_extra_files)

    statistics = {
        'Total Directory Files': total_directory_files,
        'Total DataFrame Names': total_dataframe_names,
        'Common Files': total_common_files,
        'Missing Files': total_missing_files,
        'Extra Files': total_extra_files,
        #'Common File Names': common_filenames,
        'Missing File Names': missing_filenames,
        'Extra File Names': extra_filenames
    }

    return statistics, df_missing

In [7]:
def load_data(datafile, flare_label, series_len, start_feature, n_features, mask_value):
    df = pd.read_csv(datafile)
    df_values = df.values
    X = []
    y = []
    tmp = []
    for k in range(start_feature, start_feature + n_features):
        tmp.append(mask_value)
    for idx in range(0, len(df_values)):
        each_series_data = []
        row = df_values[idx]
        label = row[1][0]
        if flare_label == 'C' and (label == 'X' or label == 'M'):
            label = 'C'
        if flare_label == 'C' and label == 'B':
            label = 'N'
        has_zero_record = False
        # if at least one of the 25 physical feature values is missing, then discard it.
        if flare_label == 'C':
            if float(row[5]) == 0.0:
                has_zero_record = True
            if float(row[7]) == 0.0:
                has_zero_record = True
            for k in range(9, 13):
                if float(row[k]) == 0.0:
                    has_zero_record = True
                    break
            for k in range(14, 16):
                if float(row[k]) == 0.0:
                    has_zero_record = True
                    break
            if float(row[18]) == 0.0:
                has_zero_record = True

        if has_zero_record is False:
            cur_noaa_num = int(row[3])
            each_series_data.append(row[start_feature:start_feature + n_features].tolist())
            itr_idx = idx - 1
            while itr_idx >= 0 and len(each_series_data) < series_len:
                prev_row = df_values[itr_idx]
                prev_noaa_num = int(prev_row[3])
                if prev_noaa_num != cur_noaa_num:
                    break
                has_zero_record_tmp = False
                if flare_label == 'C':
                    if float(row[5]) == 0.0:
                        has_zero_record_tmp = True
                    if float(row[7]) == 0.0:
                        has_zero_record_tmp = True
                    for k in range(9, 13):
                        if float(row[k]) == 0.0:
                            has_zero_record_tmp = True
                            break
                    for k in range(14, 16):
                        if float(row[k]) == 0.0:
                            has_zero_record_tmp = True
                            break
                    if float(row[18]) == 0.0:
                        has_zero_record_tmp = True

                if len(each_series_data) < series_len and has_zero_record_tmp is True:
                    each_series_data.insert(0, tmp)

                if len(each_series_data) < series_len and has_zero_record_tmp is False:
                    each_series_data.insert(0, prev_row[start_feature:start_feature + n_features].tolist())
                itr_idx -= 1

            while len(each_series_data) > 0 and len(each_series_data) < series_len:
                each_series_data.insert(0, tmp)

            if len(each_series_data) > 0:
                X.append(np.array(each_series_data).reshape(series_len, n_features).tolist())
                y.append(label)
    X_arr = np.array(X)
    y_arr = np.array(y)
    print(X_arr.shape)
    return X_arr, y_arr


def data_transform(data):
    encoder = LabelEncoder()
    encoder.fit(data)
    encoded_Y = encoder.transform(data)
    converteddata = np_utils.to_categorical(encoded_Y)
    return converteddata


def attention_3d_block(hidden_states, series_len):
    hidden_size = int(hidden_states.shape[2])
    hidden_states_t = Permute((2, 1), name='attention_input_t')(hidden_states)
    hidden_states_t = Reshape((hidden_size, series_len), name='attention_input_reshape')(hidden_states_t)
    score_first_part = Dense(series_len, use_bias=False, name='attention_score_vec')(hidden_states_t)
    score_first_part_t = Permute((2, 1), name='attention_score_vec_t')(score_first_part)
    h_t = Lambda(lambda x: x[:, :, -1], output_shape=(hidden_size, 1), name='last_hidden_state')(hidden_states_t)
    score = dot([score_first_part_t, h_t], [2, 1], name='attention_score')
    attention_weights = Activation('softmax', name='attention_weight')(score)
    context_vector = dot([hidden_states_t, attention_weights], [2, 1], name='context_vector')
    context_vector = Reshape((hidden_size,))(context_vector)
    h_t = Reshape((hidden_size,))(h_t)
    pre_activation = concatenate([context_vector, h_t], name='attention_output')
    attention_vector = Dense(hidden_size, use_bias=False, activation='tanh', name='attention_vector')(pre_activation)
    return attention_vector


def lstm(nclass, n_features, series_len):
    inputs = Input(shape=(series_len, n_features,))
    lstm_out = LSTM(10, return_sequences=True, dropout=0.5)(inputs)
    attention_mul = attention_3d_block(lstm_out, series_len)
    layer1_out = Dense(200, activation='relu')(attention_mul)
    layer2_out = Dense(500, activation='relu')(layer1_out)
    output = Dense(nclass, activation='softmax', activity_regularizer=regularizers.l2(0.0001))(layer2_out)
    model = Model(input=[inputs], output=output)
    return model


### try 1: DS creation from Chat GPT


In [8]:
import os
import numpy as np
import cv2

def load_images_from_folder(folder_path):
    images = []
    for filename in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, filename))
        if img is not None:
            images.append(img)
    return images

def create_dataset(data_folder, sequence_length, target_time_steps):
    dataset = []
    labels = []

    for event_folder in os.listdir(data_folder):
        event_path = os.path.join(data_folder, event_folder)
        event_images = load_images_from_folder(event_path)

        # Create sequences for the event images
        event_sequences = create_sequences(event_images, sequence_length)

        # Create labels for the event sequences
        event_labels = create_labels(event_sequences, target_time_steps)

        # Append the event sequences and labels to the main dataset
        dataset.extend(event_sequences)
        labels.extend(event_labels)

    return np.array(dataset), np.array(labels)

# Function to create sequences of AIA images with a fixed time duration
def create_sequences(images, sequence_length):
    sequences = []
    num_images = len(images)
    for i in range(0, num_images - sequence_length + 1, sequence_length):
        sequence = images[i:i + sequence_length]
        sequences.append(sequence)
    return sequences

# Function to create target labels for sequences
def create_labels(sequences, target_time_steps):
    labels = []
    for sequence in sequences:
        # Check if a CME occurs within the next target_time_steps
        cme_occurs = any(sequence[i]['CME_occurs'] for i in range(target_time_steps))
        labels.append(int(cme_occurs))
    return labels

# Assuming the AIA image data is organized in separate event folders within the 'data_folder'
data_folder = '/path/to/data_folder'
sequence_length = 6  # Choose the number of images in each sequence
target_time_steps = 12  # Choose the time steps (intervals) for predicting CME occurrence

# Create the dataset and labels
dataset, labels = create_dataset(data_folder, sequence_length, target_time_steps)


FileNotFoundError: ignored

### try 2: DS creation from Chat GPT

In [None]:
def load_images_from_directory(directory):
    images = []
    timestamps = []
    for filename in sorted(os.listdir(directory)):
        if filename.endswith(".jpeg"):
            img = load_img(os.path.join(directory, filename))
            img_array = img_to_array(img)
            images.append(img_array)
            timestamp = filename.split("__")[0]
            timestamps.append(timestamp)
    return images, timestamps

def create_conv_lstm_dataset(data_dir, T, test_size=0.2):
    pos_dir = os.path.join(data_dir, 'pos')
    neg_dir = os.path.join(data_dir, 'neg')

    pos_images, pos_timestamps = load_images_from_directory(pos_dir)
    neg_images, neg_timestamps = load_images_from_directory(neg_dir)

    all_images = np.array(pos_images + neg_images)
    all_timestamps = pos_timestamps + neg_timestamps
    labels = np.array([1] * len(pos_images) + [0] * len(neg_images))

    X = []
    y = []

    for i in range(len(all_images) - T):
        sequence = all_images[i:i + T]
        label = labels[i + T]
        X.append(sequence)
        y.append(label)

    X = np.array(X)
    y = np.array(y)

    X_train, X_test, y_train, y_test, timestamps_train, timestamps_test = train_test_split(
        X, y, all_timestamps[T:], test_size=test_size, random_state=42
    )

    return X_train, y_train, X_test, y_test, timestamps_train, timestamps_test

data_dir = '/data'
T = 5  # Number of time steps for the ConvLSTM sequence
X_train, y_train, X_test, y_test, timestamps_train, timestamps_test = create_conv_lstm_dataset(data_dir, T)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

# Define your ConvLSTM model using Keras here
# ...

# Compile and train the model
# model.compile(...)
# model.fit(...)


## Create the Dataset

### Load Event-Lists

Same list from *Liu et al. 2020*, however the events were sampled for predicting CMEs within the next x hours (x = 12, 24, 36, 48 or 60).

> N means there is a >=M class flare within the next x hours but this flare is not associated with a CME. \\
> P means there is a >=M class flare within the next x hours and this flare is associated with a CME. \\
> The second column is titled Timestamp. \\
> The third column and fourth column are titled NOAA active region number and HARP number, respectively. \\
> Starting from the fifth column, you can see physical parameters of data samples, which include 18 SHARP parameters:
TOTUSJH, TOTPOT, TOTUSJZ, ABSNJZH, SAVNCPP, USFLUX, AREA_ACR, MEANPOT, R_VALUE, SHRGT45, MEANGAM, MEANJZH, MEANGBT, MEANGBZ, MEANJZD, MEANGBH, MEANSHR, MEANALP.

#### Base Event List

In [None]:
# load list with both (neg, pos) labels
df_cme_list = pd.read_csv(r'00_Dataset/event_lists/all_cme_events.csv', delimiter =';')

# convert time stamp such that sdo-cli can read them
df_cme_list['Start Time'] = pd.to_datetime(df_cme_list['Start Time']).dt.strftime('%Y-%m-%dT%H:%M:%S')
df_cme_list['Peak Time'] = pd.to_datetime(df_cme_list['Peak Time']).dt.strftime('%Y-%m-%dT%H:%M:%S')
df_cme_list['End Time'] = pd.to_datetime(df_cme_list['End Time']).dt.strftime('%Y-%m-%dT%H:%M:%S')
df_cme_list.head()

#### Training List

In [None]:
# load list with both (neg, pos) labels of training list for t= 12 h prediction window
df_cme_training_12 = pd.read_csv(r'00_Dataset/event_lists/Liu2020_CME_data_samples/normalized_training_12.csv', delimiter =',')
df_cme_training_12.shape
df_cme_training_12.head()

In [None]:
# reduce dataframe to relevant columns
df_cme_training_12 = df_cme_training_12.drop(['TOTUSJH',	'TOTPOT',	'TOTUSJZ',	'ABSNJZH',	'SAVNCPP',
                     'USFLUX', 'AREA_ACR', 'MEANPOT', 'R_VALUE', 'SHRGT45',
                     'MEANGAM', 'MEANJZH', 'MEANGBT', 'MEANGBZ', 'MEANJZD',
                     'MEANGBH', 'MEANSHR', 'MEANALP'], axis=1)


In [None]:
df_cme_training_12['Timestamp'] = pd.to_datetime(df_cme_training_12['Timestamp']).dt.strftime('%Y-%m-%dT%H:%M:%S')
df_cme_training_12.tail()

In [None]:
df_cme_training_12 = df_cme_training_12.tail(df_cme_training_12.shape[0] -282)
df_cme_training_12

In [None]:
# get negative samples
df_cme_training_12_neg = df_cme_training_12.loc[df_cme_training_12['Label'] == 'N']
df_cme_training_12_neg = df_cme_training_12_neg.reset_index(drop=True)
print('There are', df_cme_training_12_neg.shape[0], 'negative samples in the training set.\n')
df_cme_training_12_neg.tail()

In [None]:
# get positive samples
df_cme_training_12_pos = df_cme_training_12.loc[df_cme_training_12['Label'] == 'P']
df_cme_training_12_pos = df_cme_training_12_pos.reset_index(drop = True)
print('There are', df_cme_training_12_pos.shape[0], 'positive samples in the training set.\n')
df_cme_training_12_pos.tail()

#### Testing List

In [None]:
# load list with both (neg, pos) labels of testing list for t= 12 h
df_cme_test_12 = pd.read_csv(r'00_Dataset/event_lists/Liu2020_CME_data_samples/normalized_testing_12.csv', delimiter =',')
df_cme_test_12.head()

In [None]:
# reduce dataframe to relevant columns
df_cme_test_12 = df_cme_test_12.drop(['TOTUSJH',	'TOTPOT',	'TOTUSJZ',	'ABSNJZH',	'SAVNCPP',
                     'USFLUX', 'AREA_ACR', 'MEANPOT', 'R_VALUE', 'SHRGT45',
                     'MEANGAM', 'MEANJZH', 'MEANGBT', 'MEANGBZ', 'MEANJZD',
                     'MEANGBH', 'MEANSHR', 'MEANALP'], axis=1)
df_cme_test_12.head()

In [None]:
# convert time stamp
# convert time stamp such that sdo-cli can read them
df_cme_test_12['Timestamp'] = pd.to_datetime(df_cme_test_12['Timestamp']).dt.strftime('%Y-%m-%dT%H:%M:%S')
df_cme_test_12.tail()

In [None]:
df_cme_test_12_neg = df_cme_test_12.loc[df_cme_test_12['Label'] == 'N']
df_cme_test_12_neg = df_cme_test_12_neg.reset_index(drop=True)
print('There are', df_cme_test_12_neg.shape[0], 'negative samples in the test set.\n')
df_cme_test_12_neg.tail()

In [None]:
df_cme_test_12_pos = df_cme_test_12.loc[df_cme_test_12['Label'] == 'P']
df_cme_test_12_pos = df_cme_test_12_pos.reset_index(drop=True)
print('There are', df_cme_test_12_pos.shape[0], 'positive samples in the test set.\n')
df_cme_test_12_pos.tail()

By now we have four event lists from the sampled dataset:


*   df_cme_test_12_neg >> 762
*   df_cme_test_12_pos >> 550
*   df_cme_training_12_neg >> 16678
*   df_cme_training_12_pos >> 3387 samples


### Download and Check the Image-Data

#### Training - negative

In [None]:
# create_sdo_aia_dataset(output_dir = './data/Liu2020_sampled/training_12/neg/', start_idx=0,  event_list = df_cme_training_12_neg, dt = '10min', wavelength = '171')

In [None]:
directory_path_training_12_neg = '00_Dataset/Liu2020_sampled/training_12/neg/'
result_training_12_neg, missing_files_training_neg = compare_filenames_with_dataframe(directory_path_training_12_neg, df_cme_training_12_neg, 171)

#### Training - positive

In [None]:
# create_sdo_aia_dataset(output_dir = './data/Liu2020_sampled/training_12/pos/', start_idx=0, event_list = df_cme_training_12_pos, dt = '10min', wavelength = '171')

In [None]:
directory_path_training_12_pos = '00_Dataset/Liu2020_sampled/training_12/pos/'
result_training_12_pos, missing_files_training_pos = compare_filenames_with_dataframe(directory_path_training_12_pos, df_cme_training_12_pos, 171)

#### Testing - negative

In [None]:
# create_sdo_aia_dataset(output_dir = './data/Liu2020_sampled/test_12/neg/', start_idx=0, event_list = df_cme_test_12_neg, dt = '10min', wavelength = '171')

In [None]:
directory_path_test_12_neg = '00_Dataset/Liu2020_sampled/test_12/neg/'
result_test_12_neg, missing_files_test_neg = compare_filenames_with_dataframe(directory_path_test_12_neg, df_cme_test_12_neg, 171)

#### Testing - positive

In [None]:
# create_sdo_aia_dataset(output_dir = './data/Liu2020_sampled/test_12/pos/', start_idx= 0, event_list = df_cme_test_12_pos, dt = '10min', wavelength = '171')

In [28]:
directory_path_test_12_pos = '00_Dataset/Liu2020_sampled/test_12/pos/'
result_test_12_neg, missing_test_pos = compare_filenames_with_dataframe(directory_path_test_12_pos, df_cme_test_12_pos, 171)

Total Directory Files:  550
Total DataFrame Names:  550
Common Files:  549
Missing Files:  1
Extra Files:  1


### Display a Sample Image-Series

In [None]:
# display a sequence???



## Build the ConvLSTM Model

In [None]:
if __name__ == '__main__':
    flare_label = sys.argv[1]
    train_again = int(sys.argv[2])
    filepath = './'
    n_features = 0
    if flare_label == 'C':
        n_features = 14
    start_feature = 5
    mask_value = 0
    series_len = 10
    epochs = 7
    batch_size = 256
    nclass = 2
    result_file = './output.csv'

    if train_again == 1:
        # Train
        X_train_data, y_train_data = load_data(datafile=filepath + 'normalized_training.csv',
                                               flare_label=flare_label, series_len=series_len,
                                               start_feature=start_feature, n_features=n_features,
                                               mask_value=mask_value)

        X_train = np.array(X_train_data)
        y_train = np.array(y_train_data)
        y_train_tr = data_transform(y_train)

        class_weights = class_weight.compute_class_weight('balanced',
                                                          np.unique(y_train), y_train)
        class_weight_ = {0: class_weights[0], 1: class_weights[1]}
        # print(class_weight_)

        model = lstm(nclass, n_features, series_len)
        model.compile(loss='categorical_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])

        history = model.fit(X_train, y_train_tr,
                            epochs=epochs, batch_size=batch_size,
                            verbose=False, shuffle=True, class_weight=class_weight_)
        model.save('./model.h5')
    else:
        model = load_model('./model.h5')

    # Test
    X_test_data, y_test_data = load_data(datafile=filepath + 'normalized_testing.csv',
                                         flare_label=flare_label, series_len=series_len,
                                         start_feature=start_feature, n_features=n_features,
                                         mask_value=mask_value)
    X_test = np.array(X_test_data)
    y_test = np.array(y_test_data)
    y_test_tr = data_transform(y_test)

    classes = model.predict(X_test, batch_size=batch_size, verbose=0, steps=None)

    with open(result_file, 'w', encoding='UTF-8') as result_csv:
        w = csv.writer(result_csv)
        with open(filepath + 'normalized_testing.csv', encoding='UTF-8') as data_csv:
            reader = csv.reader(data_csv)
            i = -1
            for line in reader:
                if i == -1:
                    line.insert(0, 'Predicted Label')
                else:
                    if classes[i][0] >= 0.5:
                        line.insert(0, 'Positive')
                    else:
                        line.insert(0, 'Negative')
                i += 1
                w.writerow(line)

### try: ChatGPT

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, ConvLSTM2D, MaxPooling2D, Flatten, Dense

# Assuming you have already created the 'dataset' and 'labels' using the provided code

# Normalize the pixel values in the dataset to a range [0, 1]
dataset = dataset.astype('float32') / 255.0

# Reshape the dataset to match the input shape expected by ConvLSTM
# Assuming the images have shape (image_height, image_width, num_channels)
# and 'sequence_length' images per sequence
sequence_length, image_height, image_width, num_channels = dataset.shape
input_shape = (sequence_length, image_height, image_width, num_channels)

# Build the ConvLSTM model
model = Sequential()
model.add(ConvLSTM2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=input_shape, padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Split the dataset into training and testing sets
train_samples = int(0.8 * len(dataset))  # 80% for training
x_train, y_train = dataset[:train_samples], labels[:train_samples]
x_test, y_test = dataset[train_samples:], labels[train_samples:]

# Train the ConvLSTM model
epochs = 10  # Adjust the number of epochs based on your dataset and model complexity
batch_size = 32
model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, y_test))


## Evaluate the Model