In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from utils.data import *
from utils.datetime import convert_to_pd_timestamp
from utils.maps import (location_map, zurich_map, penumbra_map, compactness_map,
                        xray_class_map)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import (RandomForestRegressor, GradientBoostingRegressor,
                              RandomForestClassifier, GradientBoostingClassifier)
from sklearn.svm import SVR, SVC
from sklearn.metrics import (accuracy_score, mean_squared_error, r2_score, mean_absolute_error,
                             precision_score, recall_score, f1_score, confusion_matrix)

import seaborn as sns

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import GridSearchCV

2024-08-13 01:43:29.997455: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-13 01:43:30.005172: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-13 01:43:30.082486: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-13 01:43:30.198209: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-13 01:43:30.317727: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been 

In [2]:
df = pd.read_csv("data/final-merged.csv")
df['Date'] = df['Date'].apply(convert_to_pd_timestamp)
df['Is Flare'] = df['Flares Count'].apply(lambda _x: 1 if _x > 0 else 0)
# Encoding categorical columns: 'Zurich Class', 'Penumbra Class', 'Compactness Class'
label_encoders = {}
for col in ['Zurich Class', 'Penumbra Class', 'Compactness Class', 'NS', 'EW', 'X-ray class']:
  le = LabelEncoder()
  df[col] = le.fit_transform(df[col].astype(str))
  label_encoders[col] = le

def reverse_labelling(_df):
  # Reverse the label encoding
  for _col, _le in label_encoders.items():
    _df[_col] = _le.inverse_transform(_df[_col])

In [3]:
columns_for_x = ['Date', 'Total Sunspot', 'Max Size',
                 'Zurich Class', 'Penumbra Class', 'Compactness Class',
                 'NS', 'Lat', 'EW', 'Lan']

X = df[columns_for_x]
y_is_flare = df['Is Flare']

X_train, X_test, y_is_flare_train, y_is_flare_test = train_test_split(
  X, y_is_flare, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
models_dict = {
  "LR": LinearRegression,
  "LgR": LogisticRegression,  # classifier
  "DTR": DecisionTreeRegressor,
  "DTC": DecisionTreeClassifier,  # classifier
  "RFR": RandomForestRegressor,
  "RFC": RandomForestClassifier,  # classifier
  "SVR": SVR,
  "SVM": SVC,  # classifier
  "GBR": GradientBoostingRegressor,
  "GBC": GradientBoostingClassifier,  # classifier
}

def train_model(_model_key, _X_train, _y_train, **kwargs):
  if _model_key not in models_dict.keys():
    raise ValueError(f"Model for key `{_model_key}` not found! Available keys: {', '.join(models_dict.keys())}")
  
  model = models_dict[_model_key](**kwargs)
  model.fit(_X_train, _y_train)
  return model

def evaluate_model(_model, _X_test, _y_test):
  _predictions = _model.predict(_X_test)
  _mse = mean_squared_error(_y_test, _predictions)
  _mae = mean_absolute_error(_y_test, _predictions)
  _r2 = r2_score(_y_test, _predictions)
  print(f"MSE: {_mse}, MAE: {_mae}, R2 Score: {_r2}")
  return _mse, _mae, _r2

def evaluate_classification_model(_model, _X_test, _y_test):
  _predictions = _model.predict(_X_test)
  _accuracy = accuracy_score(_y_test, _predictions)
  _precision = precision_score(_y_test, _predictions, zero_division=1)
  _recall = recall_score(_y_test, _predictions)
  _f1 = f1_score(_y_test, _predictions)
  _conf_matrix = confusion_matrix(_y_test, _predictions)
  return _accuracy, _precision, _recall, _f1, _conf_matrix

In [5]:
# Parameter grid to intentionally cause overfitting
param_grid = {
  'n_estimators': [200, 500, 1000],         # Increase the number of trees
  'max_depth': [None, 50, 100],             # Increase the tree depth
  'min_samples_split': [2, 5],              # Reduce the minimum samples required to split a node
  'min_samples_leaf': [1, 2],               # Reduce the minimum samples required at a leaf node
  # 'max_features': [None, 'sqrt', 'log2'],   # Use all features or most features
  # 'bootstrap': [False]                      # Disable bootstrap sampling
}

grid_search = GridSearchCV(
  estimator=RandomForestClassifier(random_state=42),
  param_grid=param_grid,
  scoring='accuracy',
  cv=3,  # Cross-validation fold
  verbose=1,
  n_jobs=-1
)

grid_search.fit(X_train_scaled, y_is_flare_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print(f"Best Parameters: {best_params}")
print(f"Best Cross-Validation Score: {best_score}")

Fitting 3 folds for each of 36 candidates, totalling 108 fits




Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
Best Cross-Validation Score: 0.8273636162349054


In [6]:
# Parameter grid to intentionally cause overfitting
param_grid = {
  'n_estimators': [200, 500],         # Increase the number of trees
  'max_depth': [50, 100],             # Increase the tree depth
  'min_samples_split': [2, 5],              # Reduce the minimum samples required to split a node
  'min_samples_leaf': [1, 2],               # Reduce the minimum samples required at a leaf node
  # 'max_features': [None, 'sqrt', 'log2'],   # Use all features or most features
  # 'bootstrap': [False]                      # Disable bootstrap sampling
}

grid_search = GridSearchCV(
  estimator=GradientBoostingClassifier(random_state=42),
  param_grid=param_grid,
  scoring='accuracy',
  cv=3,  # Cross-validation fold
  verbose=1,
  n_jobs=-1
)

grid_search.fit(X_train_scaled, y_is_flare_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print(f"Best Parameters: {best_params}")
print(f"Best Cross-Validation Score: {best_score}")

Fitting 3 folds for each of 16 candidates, totalling 48 fits




Best Parameters: {'max_depth': 50, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 500}
Best Cross-Validation Score: 0.8214875751537138


In [10]:
# Parameter grid to intentionally cause overfitting
param_grid = {
  'penalty': ['l1', 'l2', 'elasticnet', 'none'],
  'C': [0.01, 0.1, 1, 10, 100],
  'solver': ['lbfgs', 'saga', 'liblinear'],
  'max_iter': [100, 200, 300]
}

grid_search = GridSearchCV(
  estimator=LogisticRegression(),
  param_grid=param_grid,
  scoring='accuracy',
  cv=3,  # Cross-validation fold
  verbose=1,
  n_jobs=-1
)

grid_search.fit(X_train_scaled, y_is_flare_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print(f"Best Parameters: {best_params}")
print(f"Best Cross-Validation Score: {best_score}")

Fitting 3 folds for each of 180 candidates, totalling 540 fits
Best Parameters: {'C': 1, 'max_iter': 200, 'penalty': 'l2', 'solver': 'saga'}
Best Cross-Validation Score: 0.813363309658849


315 fits failed out of a total of 540.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
45 fits failed with the following error:
Traceback (most recent call last):
  File "/home/debashis/works/dissertation/env/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/debashis/works/dissertation/env/lib/python3.10/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/home/debashis/works/dissertation/env/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py", line 1194, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/home/debashis/works/di

In [12]:
# Parameter grid to intentionally cause overfitting
param_grid = {
  'criterion': ['gini', 'entropy'],
  'splitter': ['best', 'random'],
  'max_depth': [None, 10, 20, 30],
  'min_samples_split': [2, 5, 10],
  'min_samples_leaf': [1, 2, 4],
  'max_features': [None, 'sqrt', 'log2']
}

grid_search = GridSearchCV(
  estimator=DecisionTreeClassifier(),
  param_grid=param_grid,
  scoring='accuracy',
  cv=5,  # Cross-validation fold
  verbose=1,
  n_jobs=-1
)

grid_search.fit(X_train_scaled, y_is_flare_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print(f"Best Parameters: {best_params}")
print(f"Best Cross-Validation Score: {best_score}")

Fitting 5 folds for each of 432 candidates, totalling 2160 fits
Best Parameters: {'criterion': 'entropy', 'max_depth': 10, 'max_features': None, 'min_samples_leaf': 2, 'min_samples_split': 10, 'splitter': 'best'}
Best Cross-Validation Score: 0.8161055181215747


In [13]:
# Parameter grid to intentionally cause underfitting
param_grid = {
  'n_estimators': [10, 20, 40, 100],               # Very few trees
  'max_depth': [2, 3],                    # Shallow trees
  'min_samples_split': [10, 20],          # High number of samples required to split
  'min_samples_leaf': [10, 20],           # High number of samples required at a leaf node
  'max_features': ['sqrt', 'log2']        # Limiting the number of features
}

grid_search = GridSearchCV(
  estimator=RandomForestClassifier(random_state=42),
  param_grid=param_grid,
  scoring='accuracy',
  cv=3,  # Cross-validation fold
  verbose=1,
  n_jobs=-1
)

grid_search.fit(X_train_scaled, y_is_flare_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print(f"Best Parameters: {best_params}")
print(f"Best Cross-Validation Score: {best_score}")


Fitting 3 folds for each of 64 candidates, totalling 192 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters: {'max_depth': 3, 'max_features': 'sqrt', 'min_samples_leaf': 10, 'min_samples_split': 10, 'n_estimators': 100}
Best Cross-Validation Score: 0.8136017577027234


In [14]:
# Parameter grid to intentionally cause underfitting
param_grid = {
  'n_estimators': [10, 20, 40, 100],               # Very few trees
  'max_depth': [2, 3],                    # Shallow trees
  'learning_rate': [0.001, 0.01],         # Extremely low learning rates
  'min_samples_split': [10, 20],          # High number of samples required to split
  'min_samples_leaf': [10, 20]            # High number of samples required at a leaf node
}

grid_search = GridSearchCV(
  estimator=GradientBoostingClassifier(random_state=42),
  param_grid=param_grid,
  scoring='accuracy',
  cv=3,  # Cross-validation fold
  verbose=1,
  n_jobs=-1
)

grid_search.fit(X_train_scaled, y_is_flare_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print(f"Best Parameters: {best_params}")
print(f"Best Cross-Validation Score: {best_score}")

Fitting 3 folds for each of 64 candidates, totalling 192 fits
Best Parameters: {'learning_rate': 0.01, 'max_depth': 3, 'min_samples_leaf': 10, 'min_samples_split': 10, 'n_estimators': 100}
Best Cross-Validation Score: 0.8010832353993154


In [16]:
# Parameter grid to intentionally cause underfitting
param_grid = {
  'penalty': [None],                    # No regularization (in this context, it's underfitting)
  'C': [1e-6, 1e-5],                      # Extremely high regularization strength
  'solver': ['lbfgs'],                    # Standard solver with no additional complexity
  'max_iter': [10, 20]                    # Very few iterations
}

grid_search = GridSearchCV(
  estimator=LogisticRegression(),
  param_grid=param_grid,
  scoring='accuracy',
  cv=3,  # Cross-validation fold
  verbose=1,
  n_jobs=-1
)

grid_search.fit(X_train_scaled, y_is_flare_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print(f"Best Parameters: {best_params}")
print(f"Best Cross-Validation Score: {best_score}")

Fitting 3 folds for each of 4 candidates, totalling 12 fits




Best Parameters: {'C': 1e-06, 'max_iter': 10, 'penalty': None, 'solver': 'lbfgs'}
Best Cross-Validation Score: 0.813363309658849


In [17]:
# Parameter grid to intentionally cause underfitting
param_grid = {
    'criterion': ['gini'],                  # Default criterion
    'splitter': ['best'],                   # Default splitter
    'max_depth': [1, 2],                    # Extremely shallow trees
    'min_samples_split': [10, 20],          # High number of samples required to split
    'min_samples_leaf': [10, 20],           # High number of samples required at a leaf node
    'max_features': ['sqrt', 'log2']        # Limiting the number of features
}

grid_search = GridSearchCV(
    estimator=DecisionTreeClassifier(),
    param_grid=param_grid,
    scoring='accuracy',
    cv=3,  # Cross-validation fold
    verbose=1,
    n_jobs=-1
)

grid_search.fit(X_train_scaled, y_is_flare_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print(f"Best Parameters: {best_params}")
print(f"Best Cross-Validation Score: {best_score}")

Fitting 3 folds for each of 16 candidates, totalling 48 fits
Best Parameters: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'sqrt', 'min_samples_leaf': 20, 'min_samples_split': 10, 'splitter': 'best'}
Best Cross-Validation Score: 0.8052731081702519


In [5]:
def build_and_train_cnn_overfit(_X_train, _y_train, _X_test, _y_test,
                                _epochs=100, _batch_size=16):
  # Reshape input to be 3D [samples, timesteps, features] for CNN
  X_train_cnn = np.expand_dims(_X_train, axis=2)
  X_test_cnn = np.expand_dims(_X_test, axis=2)

  model = Sequential()
  model.add(Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=(X_train_cnn.shape[1], X_train_cnn.shape[2])))
  model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
  model.add(MaxPooling1D(pool_size=2))
  model.add(Flatten())
  model.add(Dense(256, activation='relu'))
  model.add(Dense(256, activation='relu'))
  model.add(Dense(1, activation='sigmoid'))

  model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy',])

  # Increase the number of epochs and reduce dropout to overfit
  model.fit(X_train_cnn, _y_train, validation_data=(X_test_cnn, _y_test), epochs=_epochs, batch_size=_batch_size)

  return model

def build_and_train_lstm_overfit(_X_train, _y_train, _X_test, _y_test,
                                 _epochs=100, _batch_size=16):
  # Reshape input to be 3D [samples, timesteps, features] for LSTM
  X_train_lstm = np.expand_dims(_X_train, axis=1)
  X_test_lstm = np.expand_dims(_X_test, axis=1)

  model = Sequential()
  model.add(LSTM(100, input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2]), return_sequences=True))
  model.add(LSTM(100, return_sequences=True))
  model.add(LSTM(100, return_sequences=False))
  model.add(Dense(256, activation='relu'))
  model.add(Dense(1, activation='sigmoid'))

  model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy',])

  # Increase the number of epochs and reduce dropout to overfit
  model.fit(X_train_lstm, _y_train, validation_data=(X_test_lstm, _y_test), epochs=_epochs, batch_size=_batch_size)

  return model

def build_and_train_cnn_underfit(_X_train, _y_train, _X_test, _y_test,
                                 _epochs=5, _batch_size=64):
  # Reshape input to be 3D [samples, timesteps, features] for CNN
  X_train_cnn = np.expand_dims(_X_train, axis=2)
  X_test_cnn = np.expand_dims(_X_test, axis=2)

  model = Sequential()
  model.add(Conv1D(filters=16, kernel_size=5, activation='relu', input_shape=(X_train_cnn.shape[1], X_train_cnn.shape[2])))
  model.add(MaxPooling1D(pool_size=2))
  model.add(Dropout(0.5))
  model.add(Flatten())
  model.add(Dense(10, activation='relu'))
  model.add(Dense(1, activation='sigmoid'))

  model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy',])

  # Reduce the number of epochs and increase dropout to underfit
  model.fit(X_train_cnn, _y_train, validation_data=(X_test_cnn, _y_test), epochs=_epochs, batch_size=_batch_size)

  return model

def build_and_train_lstm_underfit(_X_train, _y_train, _X_test, _y_test,
                                  _epochs=5, _batch_size=64):
  # Reshape input to be 3D [samples, timesteps, features] for LSTM
  X_train_lstm = np.expand_dims(_X_train, axis=1)
  X_test_lstm = np.expand_dims(_X_test, axis=1)

  model = Sequential()
  model.add(LSTM(10, input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2]), return_sequences=False))
  model.add(Dropout(0.5))
  model.add(Dense(1, activation='sigmoid'))

  model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy',])

  # Reduce the number of epochs and increase dropout to underfit
  model.fit(X_train_lstm, _y_train, validation_data=(X_test_lstm, _y_test), epochs=_epochs, batch_size=_batch_size)

  return model

In [19]:
cnn_overfit_model = build_and_train_cnn_overfit(X_train_scaled, y_is_flare_train,
                                                X_test_scaled, y_is_flare_test)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1723506489.858502  291935 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-08-13 00:48:10.025974: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 1/100
[1m3670/3670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 4ms/step - accuracy: 0.8122 - loss: 0.4278 - val_accuracy: 0.8154 - val_loss: 0.4124
Epoch 2/100
[1m3670/3670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 4ms/step - accuracy: 0.8201 - loss: 0.4109 - val_accuracy: 0.8158 - val_loss: 0.4226
Epoch 3/100
[1m3670/3670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 4ms/step - accuracy: 0.8202 - loss: 0.4080 - val_accuracy: 0.8157 - val_loss: 0.4113
Epoch 4/100
[1m3670/3670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 4ms/step - accuracy: 0.8188 - loss: 0.4070 - val_accuracy: 0.8199 - val_loss: 0.4095
Epoch 5/100
[1m3670/3670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 4ms/step - accuracy: 0.8183 - loss: 0.4089 - val_accuracy: 0.8192 - val_loss: 0.4076
Epoch 6/100
[1m3670/3670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 4ms/step - accuracy: 0.8179 - loss: 0.4109 - val_accuracy: 0.8182 - val_loss: 0.4087
Epoc

In [6]:
lstm_overfit_model = build_and_train_lstm_overfit(X_train_scaled, y_is_flare_train,
                                                  X_test_scaled, y_is_flare_test)

Epoch 1/100


I0000 00:00:1723509831.643803  322538 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-08-13 01:43:51.671047: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
  super().__init__(**kwargs)


[1m3670/3670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.8106 - loss: 0.4299 - val_accuracy: 0.8167 - val_loss: 0.4130
Epoch 2/100
[1m3670/3670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.8180 - loss: 0.4106 - val_accuracy: 0.8186 - val_loss: 0.4146
Epoch 3/100
[1m3670/3670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.8222 - loss: 0.4074 - val_accuracy: 0.8180 - val_loss: 0.4092
Epoch 4/100
[1m3670/3670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.8201 - loss: 0.4099 - val_accuracy: 0.8151 - val_loss: 0.4134
Epoch 5/100
[1m3670/3670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.8201 - loss: 0.4090 - val_accuracy: 0.8180 - val_loss: 0.4080
Epoch 6/100
[1m3670/3670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.8199 - loss: 0.4074 - val_accuracy: 0.8166 - val_loss: 0.4090
Epoch 7/100
[1m3670/

In [7]:
cnn_underfit_model = build_and_train_cnn_underfit(X_train_scaled, y_is_flare_train,
                                                X_test_scaled, y_is_flare_test)

Epoch 1/5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m918/918[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7704 - loss: 0.4956 - val_accuracy: 0.8121 - val_loss: 0.4234
Epoch 2/5
[1m918/918[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8031 - loss: 0.4428 - val_accuracy: 0.8124 - val_loss: 0.4220
Epoch 3/5
[1m918/918[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8064 - loss: 0.4331 - val_accuracy: 0.8108 - val_loss: 0.4203
Epoch 4/5
[1m918/918[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8092 - loss: 0.4281 - val_accuracy: 0.8122 - val_loss: 0.4190
Epoch 5/5
[1m918/918[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8119 - loss: 0.4260 - val_accuracy: 0.8127 - val_loss: 0.4185


In [8]:
lstm_underfit_model = build_and_train_lstm_underfit(X_train_scaled, y_is_flare_train,
                                                  X_test_scaled, y_is_flare_test)

Epoch 1/5
[1m918/918[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.7423 - loss: 0.5500 - val_accuracy: 0.8157 - val_loss: 0.4190
Epoch 2/5
[1m918/918[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 956us/step - accuracy: 0.8134 - loss: 0.4372 - val_accuracy: 0.8178 - val_loss: 0.4140
Epoch 3/5
[1m918/918[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 960us/step - accuracy: 0.8183 - loss: 0.4283 - val_accuracy: 0.8182 - val_loss: 0.4121
Epoch 4/5
[1m918/918[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 938us/step - accuracy: 0.8172 - loss: 0.4203 - val_accuracy: 0.8189 - val_loss: 0.4108
Epoch 5/5
[1m918/918[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8118 - loss: 0.4260 - val_accuracy: 0.8190 - val_loss: 0.4100
