In [49]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from joblib import dump, load
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import L1L2
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from scikeras.wrappers import KerasClassifier
from keras.layers import LSTM, Dense, Dropout, Input


def read_csv_files(train_file, test_file, sep=None, header='infer'):
    df_train = pd.read_csv(train_file, sep=sep, header=header)
    df_test = pd.read_csv(test_file, sep=sep, header=header)
    print(df_train.shape, df_test.shape)
    return df_train, df_test

def create_sequences(X, y, seq_len):
    X_seq, y_seq = [], []

    for i in range(X.shape[0] - seq_len + 1):
        X_seq.append(X[i:i + seq_len, :])  # include all feature columns
        y_seq.append(y[i + seq_len - 1])   # use -1 index instead of +1

    return np.array(X_seq), np.array(y_seq)

def prepare_data(df_train, df_test, seq_len, target_col=0, scaler=StandardScaler(), use_label_encoder=True, output_type=None):
    if use_label_encoder:
        label_encoder = LabelEncoder()
        df_train["target"] = label_encoder.fit_transform(df_train.iloc[:, target_col])
        df_test["target"] = label_encoder.transform(df_test.iloc[:, target_col])
    else:
        df_train["target"] = df_train.iloc[:, target_col]
        df_test["target"] = df_test.iloc[:, target_col]

    y_train = df_train["target"].values
    y_test = df_test["target"].values

    df_train = df_train.drop(columns=["target"], axis=1)
    df_test = df_test.drop(columns=["target"], axis=1)

    for col in df_train.columns:
        df_train[col] = scaler.fit_transform(df_train[col].values.reshape(-1, 1))
        df_test[col] = scaler.transform(df_test[col].values.reshape(-1, 1))

    X_train = df_train.values
    X_test = df_test.values

    X_train_seq, y_train_seq = create_sequences(X_train, y_train, seq_len)
    X_test_seq, y_test_seq = create_sequences(X_test, y_test, seq_len)

    if output_type == "torch":
        X_train, y_train = torch.tensor(X_train_seq, dtype=torch.float32), torch.tensor(y_train_seq, dtype=torch.long)
        X_test, y_test = torch.tensor(X_test_seq, dtype=torch.float32), torch.tensor(y_test_seq, dtype=torch.long)
    else:
        X_train, y_train = np.array(X_train_seq), np.array(y_train_seq)
        X_test, y_test = np.array(X_test_seq), np.array(y_test_seq)

    return X_train, y_train, X_test, y_test

def create_LSTM_classification(input_shape, unique_classes, activation_function='relu', optimizer='adam', num_hidden_layers=2,
                               num_neurons_per_layer=64, dropout_rate=0.2, early_stop_patience=5,
                               kernel_regularizer=None, verbose=None, target_col=None):

    # Determine loss and activation for the output layer based on unique_classes
    if unique_classes == 2:
        loss = 'binary_crossentropy'
        output_activation = 'sigmoid'
        n_output_neurons = 1
    else:
        loss = 'categorical_crossentropy'
        output_activation = 'softmax'
        n_output_neurons = unique_classes

    # Initialize RNN
    model = Sequential()

    # Input layer
    model.add(Input(shape=input_shape))
    model.add(Dropout(dropout_rate))

    for _ in range(num_hidden_layers):
        model.add(LSTM(num_neurons_per_layer, activation=activation_function,
                       kernel_regularizer=kernel_regularizer, return_sequences=True))
        model.add(Dropout(dropout_rate))

    model.add(LSTM(num_neurons_per_layer, kernel_regularizer=kernel_regularizer,
                   return_sequences=False))
    model.add(Dropout(dropout_rate))


    # Output layer (adjusted for both binary and multi-class classification)
    model.add(Dense(n_output_neurons, activation=output_activation))

    model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])

    return model

def LSTM_classification_random_search(X_train, y_train, X_test, y_test, target_col, seq_len, n_iter=1, n_times=1, cv=3,
                                  verbose=1,dropout_rates=[0.1, 0.2, 0.3, 0.4],
                                  activation_functions=["relu", "tanh"],
                                  optimizers=["adam", "rmsprop", "adagrad"],
                                  batch_sizes=[32, 64, 128], epochs=[50, 100, 200],
                                  early_stop_patience=[5, 10, 15],
                                  num_hidden_layers_range=range(1, 4),
                                  neurons_per_layer_range=[16, 32, 48, 64],
                                  learning_rate_range=[0.001, 0.01, 0.1],
                                  model_save=True, save_directory=None,
                                  plot_loss=True, test_size=0.3, random_state=42):

  input_shape = (seq_len, X_train.shape[2])
  unique_classes = len(np.unique(y_train))

  # Model
  Keras_classifier = KerasClassifier(model=create_LSTM_classification, input_shape=input_shape,
                                    unique_classes=unique_classes, verbose=verbose)

  # Define the random search parameters
  param_dist = {
      "model__activation_function": activation_functions,
      "model__dropout_rate": dropout_rates,
      "model__optimizer": optimizers,
      "model__num_hidden_layers": num_hidden_layers_range,
      "model__num_neurons_per_layer": neurons_per_layer_range,
      "model__kernel_regularizer": [L1L2(l1=0, l2=regularization_strength) for regularization_strength in learning_rate_range],
      "batch_size": batch_sizes,
      "epochs": epochs,
      "model__early_stop_patience": early_stop_patience
  }

  if unique_classes > 2:
    y_train = to_categorical(y_train, num_classes=unique_classes)
    y_test = to_categorical(y_test, num_classes=unique_classes)

  print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

  all_results = []

  for i in range(n_times):
    print(f"Iteration {i}")
    
    # Create the RandomizedSearchCV object
    random_search = RandomizedSearchCV(Keras_classifier, param_distributions=param_dist, n_iter=n_iter,
                                        cv=cv, error_score=np.nan, verbose=2)

    # Create EarlyStopping and ModelCheckpoint callbacks
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=verbose, patience=early_stop_patience)
    mc = ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', verbose=verbose, save_best_only=False)

    # Fit to the training data
    random_search.fit(X_train, y_train, validation_data=(X_test, y_test), callbacks=[es, mc])

    # Get the best model from the RandomizedSearchCV
    best_model = random_search.best_estimator_

    # Get the best parameters from the RandomizedSearchCV
    best_params = random_search.best_params_

    # create and fit final model with best parameters
    final_model = create_LSTM_classification(input_shape=input_shape, unique_classes=unique_classes, dropout_rate=best_params['model__dropout_rate'],
                                            activation_function=best_params['model__activation_function'],
                                            optimizer=best_params['model__optimizer'],
                                            num_hidden_layers=best_params['model__num_hidden_layers'],
                                            num_neurons_per_layer=best_params['model__num_neurons_per_layer'],
                                            kernel_regularizer=best_params['model__kernel_regularizer'],
                                            early_stop_patience=best_params['model__early_stop_patience'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=verbose, patience=best_params['model__early_stop_patience'])
    mc = ModelCheckpoint(f'final_model{i}.h5', monitor='val_loss', mode='min', verbose=verbose, save_best_only=True)
    history = final_model.fit(X_train, y_train, validation_data=(X_test, y_test),
                              epochs=best_params['epochs'], batch_size=best_params['batch_size'], callbacks=[es, mc])

    # Plotting the training and validation loss
    if plot_loss:
        plt.figure(figsize=(10, 6))
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.title(f'Loss Plot of best model_{i}')
        plt.show()

    # Save the best model
    if save_directory is not None:
      dump(final_model, save_directory + f'best_model{i}.joblib')
    else:
      dump(final_model, f'best_model{i}.joblib')

    # Make predictions using the best model
    y_pred_train = final_model.predict(X_train)
    y_pred_test = final_model.predict(X_test)

    # Predict target values for training and test data
    if unique_classes > 2:
        y_pred_train = np.argmax(final_model.predict(X_train), axis=1)
        y_pred_test = np.argmax(final_model.predict(X_test), axis=1)
        # Use inverse_transform only if you use OneHotEncoder during preprocessing
        y_train_original = np.argmax(y_train, axis=1)
        y_test_original = np.argmax(y_test, axis=1)
    else:
        y_pred_train = (final_model.predict(X_train) > 0.5).astype('int32')
        y_pred_test = (final_model.predict(X_test) > 0.5).astype('int32')
        # No need for inverse_transform, use original y_train and y_test
        y_train_original = y_train
        y_test_original = y_test

    # Calculate metrics
    train_accuracy = accuracy_score(y_train_original, y_pred_train)
    test_accuracy = accuracy_score(y_test_original, y_pred_test)
    conf_mat = confusion_matrix(y_test_original, y_pred_test)
    class_report = classification_report(y_test_original, y_pred_test, output_dict=True, zero_division=1)

    # Create a figure and axis object for the current plot
    fig, ax = plt.subplots(figsize=(12, 6))

    # Plot the predicted and actual values
    ax.plot(y_pred_test, label='Predicted')
    ax.plot(y_test_original, label='Actual')

    # Set the axis labels and title
    ax.set_xlabel('Time')
    ax.set_ylabel('Value')
    ax.set_title(f'Comparison of predicted and actual values')

    # Add a legend
    ax.legend()

    # Display the plot
    plt.show()

    # Create a simple report
    simple_report = {
        'Accuracy': round(class_report['accuracy'], 3),
        'Precision': round(class_report['macro avg']['precision'], 3),
        'Recall': round(class_report['macro avg']['recall'], 3),
        'F1-score': round(class_report['macro avg']['f1-score'], 3),
        'Supoort': round(class_report['macro avg']['support'], 3)
    }

    # Create a dataframe with results
    results = {
        "Model": ["LSTM Classification with Randomized Search(best model_{i})"],
        "Train Accuracy": [train_accuracy],
        "Test Accuracy": [test_accuracy],
        "Confusion Matrix": [conf_mat.tolist()],
        "Classification Report": [simple_report],
        "Hyperparameters": [best_params],
    }
    # Append the results to the all_results list
    all_results.append(pd.DataFrame(results))
    # Concatenate all the results into a single DataFrame
    results_df = pd.concat(all_results, axis=0)

  return results_df



In [None]:
# modification_change LSTM model and activation funciton = tanh for running cuDNN
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from joblib import dump, load
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import L1L2
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from scikeras.wrappers import KerasClassifier
from keras.layers import LSTM, Dense, Dropout, Input


def read_csv_files(train_file, test_file, sep=None, header='infer'):
    df_train = pd.read_csv(train_file, sep=sep, header=header)
    df_test = pd.read_csv(test_file, sep=sep, header=header)
    print(df_train.shape, df_test.shape)
    return df_train, df_test

def create_sequences(X, y, seq_len):
    X_seq, y_seq = [], []

    for i in range(X.shape[0] - seq_len + 1):
        X_seq.append(X[i:i + seq_len, :])  # include all feature columns
        y_seq.append(y[i + seq_len - 1])   # use -1 index instead of +1

    return np.array(X_seq), np.array(y_seq)

def prepare_data(df_train, df_test, seq_len, target_col=0, scaler=StandardScaler(), use_label_encoder=True, output_type=None):
    if use_label_encoder:
        label_encoder = LabelEncoder()
        df_train["target"] = label_encoder.fit_transform(df_train.iloc[:, target_col])
        df_test["target"] = label_encoder.transform(df_test.iloc[:, target_col])
    else:
        df_train["target"] = df_train.iloc[:, target_col]
        df_test["target"] = df_test.iloc[:, target_col]

    y_train = df_train["target"].values
    y_test = df_test["target"].values

    df_train = df_train.drop(columns=["target"], axis=1)
    df_test = df_test.drop(columns=["target"], axis=1)

    for col in df_train.columns:
        df_train[col] = scaler.fit_transform(df_train[col].values.reshape(-1, 1))
        df_test[col] = scaler.transform(df_test[col].values.reshape(-1, 1))

    X_train = df_train.values
    X_test = df_test.values

    X_train_seq, y_train_seq = create_sequences(X_train, y_train, seq_len)
    X_test_seq, y_test_seq = create_sequences(X_test, y_test, seq_len)

    if output_type == "torch":
        X_train, y_train = torch.tensor(X_train_seq, dtype=torch.float32), torch.tensor(y_train_seq, dtype=torch.long)
        X_test, y_test = torch.tensor(X_test_seq, dtype=torch.float32), torch.tensor(y_test_seq, dtype=torch.long)
    else:
        X_train, y_train = np.array(X_train_seq), np.array(y_train_seq)
        X_test, y_test = np.array(X_test_seq), np.array(y_test_seq)

    return X_train, y_train, X_test, y_test

def create_LSTM_classification(input_shape, unique_classes, activation_function='tanh', optimizer='adam', num_hidden_layers=2,
                               num_neurons_per_layer=64, dropout_rate=0.2, early_stop_patience=5,
                               kernel_regularizer=None, verbose=None, target_col=None):

    # Determine loss and activation for the output layer based on unique_classes
    if unique_classes == 2:
        loss = 'binary_crossentropy'
        output_activation = 'sigmoid'
        n_output_neurons = 1
    else:
        loss = 'categorical_crossentropy'
        output_activation = 'softmax'
        n_output_neurons = unique_classes

    # Initialize RNN
    model = Sequential()

    # Input layer
    model.add(Input(shape=input_shape))
    model.add(Dropout(dropout_rate))

    for i in range(num_hidden_layers):
        if i == 0:
            # Add the first LSTM layer with return_sequences=True for the next LSTM layer
            model.add(LSTM(num_neurons_per_layer, activation=activation_function, recurrent_activation='sigmoid', recurrent_dropout=0, unroll=False, kernel_regularizer=kernel_regularizer, return_sequences=True))
        else:
            # Add intermediate LSTM layers with return_sequences=True for the next LSTM layer
            model.add(LSTM(num_neurons_per_layer, activation=activation_function, recurrent_activation='sigmoid', recurrent_dropout=0, unroll=False, kernel_regularizer=kernel_regularizer, return_sequences=True))
        model.add(Dropout(dropout_rate))

    model.add(LSTM(num_neurons_per_layer, activation=activation_function, recurrent_activation='sigmoid', recurrent_dropout=0, unroll=False, kernel_regularizer=kernel_regularizer, return_sequences=False))
    model.add(Dropout(dropout_rate))

    # Output layer (adjusted for both binary and multi-class classification)
    model.add(Dense(n_output_neurons, activation=output_activation))

    model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])

    return model

def LSTM_classification_random_search(X_train, y_train, X_test, y_test, target_col, seq_len, n_iter=1, n_times=1, cv=3,
                                  verbose=1,dropout_rates=[0.1, 0.2, 0.3, 0.4],
                                  activation_functions=["tanh"],
                                  optimizers=["adam", "rmsprop", "adagrad"],
                                  batch_sizes=[32, 64, 128], epochs=[50, 100, 200],
                                  early_stop_patience=[5, 10, 15],
                                  num_hidden_layers_range=range(1, 4),
                                  neurons_per_layer_range=[16, 32, 48, 64],
                                  learning_rate_range=[0.001, 0.01, 0.1],
                                  model_save=True, save_directory=None,
                                  plot_loss=True, test_size=0.3, random_state=42):

  input_shape = (seq_len, X_train.shape[2])
  unique_classes = len(np.unique(y_train))

  # Model
  Keras_classifier = KerasClassifier(model=create_LSTM_classification, input_shape=input_shape,
                                    unique_classes=unique_classes, verbose=verbose)

  # Define the random search parameters
  param_dist = {
      "model__activation_function": activation_functions,
      "model__dropout_rate": dropout_rates,
      "model__optimizer": optimizers,
      "model__num_hidden_layers": num_hidden_layers_range,
      "model__num_neurons_per_layer": neurons_per_layer_range,
      "model__kernel_regularizer": [L1L2(l1=0, l2=regularization_strength) for regularization_strength in learning_rate_range],
      "batch_size": batch_sizes,
      "epochs": epochs,
      "model__early_stop_patience": early_stop_patience
  }

  if unique_classes > 2:
    y_train = to_categorical(y_train, num_classes=unique_classes)
    y_test = to_categorical(y_test, num_classes=unique_classes)

  print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

  all_results = []

  for i in range(n_times):
    print(f"Iteration {i}")
    
    # Create the RandomizedSearchCV object
    random_search = RandomizedSearchCV(Keras_classifier, param_distributions=param_dist, n_iter=n_iter,
                                        cv=cv, error_score=np.nan, verbose=2)

    # Create EarlyStopping and ModelCheckpoint callbacks
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=verbose, patience=early_stop_patience)
    mc = ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', verbose=verbose, save_best_only=False)

    # Fit to the training data
    random_search.fit(X_train, y_train, validation_data=(X_test, y_test), callbacks=[es, mc])

    # Get the best model from the RandomizedSearchCV
    best_model = random_search.best_estimator_

    # Get the best parameters from the RandomizedSearchCV
    best_params = random_search.best_params_

    # create and fit final model with best parameters
    final_model = create_LSTM_classification(input_shape=input_shape, unique_classes=unique_classes, dropout_rate=best_params['model__dropout_rate'],
                                            activation_function=best_params['model__activation_function'],
                                            optimizer=best_params['model__optimizer'],
                                            num_hidden_layers=best_params['model__num_hidden_layers'],
                                            num_neurons_per_layer=best_params['model__num_neurons_per_layer'],
                                            kernel_regularizer=best_params['model__kernel_regularizer'],
                                            early_stop_patience=best_params['model__early_stop_patience'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=verbose, patience=best_params['model__early_stop_patience'])
    mc = ModelCheckpoint(f'final_model{i}.h5', monitor='val_loss', mode='min', verbose=verbose, save_best_only=True)
    history = final_model.fit(X_train, y_train, validation_data=(X_test, y_test),
                              epochs=best_params['epochs'], batch_size=best_params['batch_size'], callbacks=[es, mc])

    # Plotting the training and validation loss
    if plot_loss:
        plt.figure(figsize=(10, 6))
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.title(f'Loss Plot of best model_{i}')
        plt.show()

    # Save the best model
    if save_directory is not None:
      dump(final_model, save_directory + f'best_model{i}.joblib')
    else:
      dump(final_model, f'best_model{i}.joblib')

    # Make predictions using the best model
    y_pred_train = final_model.predict(X_train)
    y_pred_test = final_model.predict(X_test)

    # Predict target values for training and test data
    if unique_classes > 2:
        y_pred_train = np.argmax(final_model.predict(X_train), axis=1)
        y_pred_test = np.argmax(final_model.predict(X_test), axis=1)
        # Use inverse_transform only if you use OneHotEncoder during preprocessing
        y_train_original = np.argmax(y_train, axis=1)
        y_test_original = np.argmax(y_test, axis=1)
    else:
        y_pred_train = (final_model.predict(X_train) > 0.5).astype('int32')
        y_pred_test = (final_model.predict(X_test) > 0.5).astype('int32')
        # No need for inverse_transform, use original y_train and y_test
        y_train_original = y_train
        y_test_original = y_test

    # Calculate metrics
    train_accuracy = accuracy_score(y_train_original, y_pred_train)
    test_accuracy = accuracy_score(y_test_original, y_pred_test)
    conf_mat = confusion_matrix(y_test_original, y_pred_test)
    class_report = classification_report(y_test_original, y_pred_test, output_dict=True, zero_division=1)

    # Create a figure and axis object for the current plot
    fig, ax = plt.subplots(figsize=(12, 6))

    # Plot the predicted and actual values
    ax.plot(y_pred_test, label='Predicted')
    ax.plot(y_test_original, label='Actual')

    # Set the axis labels and title
    ax.set_xlabel('Time')
    ax.set_ylabel('Value')
    ax.set_title(f'Comparison of predicted and actual values')

    # Add a legend
    ax.legend()

    # Display the plot
    plt.show()

    # Create a simple report
    simple_report = {
        'Accuracy': round(class_report['accuracy'], 3),
        'Precision': round(class_report['macro avg']['precision'], 3),
        'Recall': round(class_report['macro avg']['recall'], 3),
        'F1-score': round(class_report['macro avg']['f1-score'], 3),
        'Supoort': round(class_report['macro avg']['support'], 3)
    }

    # Create a dataframe with results
    results = {
        "Model": ["LSTM Classification with Randomized Search(best model_{i})"],
        "Train Accuracy": [train_accuracy],
        "Test Accuracy": [test_accuracy],
        "Confusion Matrix": [conf_mat.tolist()],
        "Classification Report": [simple_report],
        "Hyperparameters": [best_params],
    }
    # Append the results to the all_results list
    all_results.append(pd.DataFrame(results))
    # Concatenate all the results into a single DataFrame
    results_df = pd.concat(all_results, axis=0)

  return results_df



In [46]:
# Read the files
train_file = "/home/young78703/Data_Science_Project/data/UCRArchive_2018/ElectricDevices/ElectricDevices_TRAIN.tsv"
test_file = "/home/young78703/Data_Science_Project/data/UCRArchive_2018/ElectricDevices/ElectricDevices_TEST.tsv"
df_train, df_test = read_csv_files(train_file, test_file, sep='\t', header=None)

(8926, 97) (7711, 97)


In [47]:
# Prepare the data
seq_len = 3
use_label_encoder=True
output_type=None
X_train, y_train, X_test, y_test = prepare_data(df_train, df_test, seq_len=seq_len, target_col=0, output_type=output_type, use_label_encoder=use_label_encoder)

In [None]:
LSTM_classification_random_search(X_train, y_train, X_test, y_test, seq_len=3, target_col=0, n_iter=1, n_times=2)

In [8]:
pd.set_option('display.max_columns', 100)