In [2]:
import pandas as pd
import warnings
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Concatenate, Attention
from tensorflow.keras.callbacks import EarlyStopping
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler
from tcn import TCN

warnings.filterwarnings('ignore')


2023-12-16 13:55:57.797055: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-16 13:55:57.840934: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-16 13:55:57.840972: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-16 13:55:57.840995: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-16 13:55:57.848918: I tensorflow/core/platform/cpu_feature_g

# Hybrid

In [18]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()


In [19]:
file_path = "myData2.parquet"
df = pd.read_parquet(file_path)
# 'timestamp_seconds', # FixMe: lowers the accuracy 
selected_features = ['timestamp_seconds',
                     'node_memory_Percpu_bytes', 
                     'node_context_switches_total', 
                     'surfsara_power_usage', 
                     'node_netstat_Tcp_InSegs', 
                     'node_netstat_Tcp_OutSegs', 
                     'node_network_transmit_packets_total-sum', 
                     'node_filesystem_size_bytes-sum', 
                     'node_filesystem_files-sum', 
                     'node_memory_MemFree_bytes', 
                     'node_netstat_Tcp_InErrs']

# Extract relevant columns
df_selected = df[['timestamp', 'state'] + selected_features].copy()

# Encode the target variable 'state' to binary (0 for "COMPLETED", 1 otherwise)
df_selected['target'] = (df_selected['state'] != 'COMPLETED').astype(int)

# Drop the original 'state' column
df_selected.drop('state', axis=1, inplace=True)

# Define time intervals
time_intervals = {'minute': '1T', 'hour': '1H', 'day': '1D'}

# Normalize selected features
scaler = MinMaxScaler()
df_selected[selected_features] = scaler.fit_transform(df_selected[selected_features])

# Set sequence length
sequence_length = 30

# Define early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Function to prepare data
def prepare_data(data, time_interval):
    data.set_index('timestamp', inplace=True) # FixMe
    data_resampled = data.resample(time_interval).sum()
    data_resampled['target'] = data_resampled['target'].clip(upper=1)  # Clip values to 1
    return data_resampled

# Function to create sequences
def create_sequences(data, sequence_length):
    sequences, targets = [], []
    for i in range(len(data) - sequence_length):
        seq = data.iloc[i:i+sequence_length].values
        target = data.iloc[i+sequence_length]['target']
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

# Prepare data with hourly intervals
data_day = prepare_data(df_selected, time_intervals['day'])

# Create sequences and targets
sequences_day, targets_day = create_sequences(data_day, sequence_length)

# Split the data into training and testing sets
X_train_day, X_test_day, y_train_day, y_test_day = train_test_split(sequences_day, targets_day, test_size=0.3, random_state=1)

# Build the LSTM model
lstm_model_day = Sequential()
lstm_model_day.add(LSTM(50, input_shape=(X_train_day.shape[1], X_train_day.shape[2])))
lstm_model_day.add(Dense(1, activation='sigmoid'))
lstm_model_day.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])

# Train the model
# lstm_model_day.fit(X_train_day, y_train_day, epochs=20, batch_size=16, validation_split=0.1, callbacks=[early_stopping])
lstm_model_day.fit(X_train_day, y_train_day, epochs=10, batch_size=16, validation_split=0.15)

# Build the TCN model
tcn_model_day = Sequential([
    TCN(input_shape=(sequence_length, X_train_day.shape[2])),
    Dense(1, activation='sigmoid')
])

# Compile the model
tcn_model_day.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])# FixMe

# Train the model
tcn_model_day.fit(X_train_day, y_train_day, epochs=20, batch_size=32, validation_split=0.15)

# Function to create a hybrid model with attention mechanism
def create_attention_hybrid_model(lstm_model, tcn_model):
    lstm_input = lstm_model.input
    tcn_input = tcn_model.input

    # Get the output layers of both models
    lstm_output = lstm_model.layers[-1].output
    tcn_output = tcn_model.layers[-1].output

    # Use Attention mechanism to combine outputs
    attention = Attention()([lstm_output, tcn_output])
    merged = Concatenate()([lstm_output, tcn_output, attention])

    # Add a dense layer for the final prediction
    merged = Dense(1, activation='sigmoid')(merged)

    # Create the ensemble model
    ensemble_model = Model(inputs=[lstm_input, tcn_input], outputs=merged)

    # Compile the model
    ensemble_model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])

    return ensemble_model

# Function to make predictions on new data for the hybrid model
def predict_future_failures_hybrid(model, input_data_lstm, input_data_tcn, sequence_length, prediction_steps):
    predictions = []

    for _ in range(prediction_steps):
        # Make predictions for the next time step using both LSTM and TCN models
        prediction = model.predict([input_data_lstm.reshape(1, sequence_length, input_data_lstm.shape[1]),
                                    input_data_tcn.reshape(1, sequence_length, input_data_tcn.shape[1])])
        predictions.append(prediction[0, 0])

        # Shift the input data by one time step and append the new prediction
        input_data_lstm = np.roll(input_data_lstm, shift=-1, axis=0)
        input_data_lstm[-1, -1] = prediction[0, 0]

        input_data_tcn = np.roll(input_data_tcn, shift=-1, axis=0)
        input_data_tcn[-1, -1] = prediction[0, 0]

    return predictions

# Select a starting point for predictions
input_data_lstm_hybrid = X_test_day[3]
input_data_tcn_hybrid = X_test_day[3]

# Number of time steps to predict into the future
prediction_steps_hybrid = 7

# Create the hybrid model
hybrid_model_day_attention = create_attention_hybrid_model(lstm_model_day, tcn_model_day)

# Train the hybrid model with both LSTM and TCN data
hybrid_model_day_attention.fit([X_train_day, X_train_day], y_train_day, epochs=20, batch_size=4, validation_split=0.15)

# Evaluate the model using Mean Absolute Error
mae_day_attention = hybrid_model_day_attention.evaluate([X_test_day, X_test_day], y_test_day, verbose=0)[1]
print(f'Model Mean Absolute Error: {mae_day_attention:.4f}')

# Make predictions with the hybrid model
predicted_failures_hybrid_attention = predict_future_failures_hybrid(hybrid_model_day_attention, input_data_lstm_hybrid, input_data_tcn_hybrid, sequence_length, prediction_steps_hybrid)

# Print the predicted failures
print("Predicted Failures for the Next 7 Time Steps (Hybrid):")
print(predicted_failures_hybrid_attention)

# Evaluate the predictions using Mean Absolute Error
mae_predictions_hybrid_attention_day = np.mean(np.abs(predicted_failures_hybrid_attention - y_test_day[3:3+prediction_steps_hybrid]))
print(f'Mean Absolute Error for Predictions: {mae_predictions_hybrid_attention_day:.4f}')

# Evaluate the predictions using Mean Squared Error
mse_predictions = np.mean((predicted_failures_hybrid_attention - y_test_day[3:3+prediction_steps_hybrid])**2)
print(f'Mean Squared Error for Predictions: {mse_predictions:.4f}')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model Mean Absolute Error: 0.1553
Predicted Failures for the Next 7 Time Steps (Hybrid):
[0.73098207, 0.73291093, 0.73374546, 0.7343187, 0.75734246, 0.75273067, 0.7392869]
Mean Absolute Error for Predictions: 0.2598
Mean Squared Error for Predictions: 0.0676


# TCN

# LSTM

In [16]:
file_path = "myData2.parquet"
df = pd.read_parquet(file_path)

selected_features = ['timestamp_seconds', 
                     'node_memory_Percpu_bytes', 
                     'node_context_switches_total', 
                     'surfsara_power_usage', 
                     'node_netstat_Tcp_InSegs', 
                     'node_netstat_Tcp_OutSegs', 
                     'node_network_transmit_packets_total-sum', 
                     'node_filesystem_size_bytes-sum', 
                     'node_filesystem_files-sum', 
                     'node_memory_MemFree_bytes', 
                     'node_netstat_Tcp_InErrs']

# Extract relevant columns
df_selected = df[['timestamp', 'state'] + selected_features].copy()

# Encode the target variable 'state' to binary (0 for "COMPLETED", 1 otherwise)
df_selected['target'] = (df_selected['state'] != 'COMPLETED').astype(int)

# Drop the original 'state' column
df_selected.drop('state', axis=1, inplace=True)

# Define time intervals
time_intervals = {'minute': '1T', 'hour': '1H', 'day': '1D'}

# Normalize selected features
scaler = MinMaxScaler()
df_selected[selected_features] = scaler.fit_transform(df_selected[selected_features])

# Function to prepare data for LSTM
def prepare_lstm_data(data, time_interval):
    data.set_index('timestamp', inplace=True) # FixMe
    data_resampled = data.resample(time_interval).sum()
    data_resampled['target'] = data_resampled['target'].clip(upper=1)  # Clip values to 1
    return data_resampled

# Function to create sequences for LSTM
def create_lstm_sequences(data, sequence_length):
    sequences, targets = [], []
    for i in range(len(data) - sequence_length):
        seq = data.iloc[i:i+sequence_length].values
        target = data.iloc[i+sequence_length]['target']
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

# Set sequence length
sequence_length = 30

# Prepare data for LSTM with daily intervals
lstm_data_day = prepare_lstm_data(df_selected, time_intervals['day'])

# Create sequences and targets
sequences_day, targets_day = create_lstm_sequences(lstm_data_day, sequence_length)

# Split the data into training and testing sets
X_train_day, X_test_day, y_train_day, y_test_day = train_test_split(sequences_day, targets_day, test_size=0.3, random_state=1)

# Build the LSTM model
lstm_model_day = Sequential()
lstm_model_day.add(LSTM(50, input_shape=(X_train_day.shape[1], X_train_day.shape[2])))
lstm_model_day.add(Dense(1, activation='sigmoid'))
lstm_model_day.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error']) # FixMe

# Train the model 
# , callbacks=[early_stopping]
lstm_model_day.fit(X_train_day, y_train_day, epochs=20, batch_size=32, validation_split=0.1)

# Evaluate the model using Mean Absolute Error
mae_day = lstm_model_day.evaluate(X_test_day, y_test_day, verbose=0)[1]
print(f'Model Mean Absolute Error: {mae_day:.4f}')

input_data = X_test_day[3]  # Can be any valid starting point

# Number of time steps to predict into the future
prediction_steps = 7

# Function to make predictions on new data
def predict_future_failures(model, input_data, sequence_length, prediction_steps):
    predictions = []

    for _ in range(prediction_steps):
        # Make a prediction for the next time step
        prediction = model.predict(input_data.reshape(1, sequence_length, input_data.shape[1]))
        predictions.append(prediction[0, 0])

        # Shift the input data by one time step and append the new prediction
        input_data = np.roll(input_data, shift=-1, axis=0)
        input_data[-1, -1] = prediction[0, 0]

    return predictions

# Make predictions
predicted_failures = predict_future_failures(lstm_model_day, input_data, sequence_length, prediction_steps)

# Denormalize the predicted failures 
predicted_failures_denormalized = predicted_failures * (lstm_data_day['target'].max() - lstm_data_day['target'].min()) + lstm_data_day['target'].min()

# Print the predicted failures
print("Predicted failures for the next 7 days:")
print(predicted_failures_denormalized)

# Evaluate the predictions using Mean Absolute Error
mae_predictions = np.mean(np.abs(predicted_failures - y_test_day[3:3+prediction_steps]))
print(f'Mean Absolute Error for Predictions: {mae_predictions:.4f}')

# Evaluate the predictions using Mean Squared Error
mse_predictions = np.mean((predicted_failures - y_test_day[3:3+prediction_steps])**2)
print(f'Mean Squared Error for Predictions: {mse_predictions:.4f}')


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model Mean Absolute Error: 0.1455
Predicted failures for the next 7 days:
[0.699964   0.74753016 0.80661297 0.8396693  0.6388343  0.64582556
 0.64710116]
Mean Absolute Error for Predictions: 0.2821
Mean Squared Error for Predictions: 0.0853
