In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

2023-12-16 16:55:48.585149: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-16 16:55:48.630724: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-16 16:55:48.630762: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-16 16:55:48.630786: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-16 16:55:48.639166: I tensorflow/core/platform/cpu_feature_g

In [26]:
file_path = "myData2.parquet"
df = pd.read_parquet(file_path)

In [5]:
# df.head(20)
# df['state'].value_counts()
# df.info(max_cols=100)
# data.head(20)

In [3]:
selected_features = ['timestamp_seconds', 
                     'node_memory_Percpu_bytes', 
                     'node_context_switches_total', 
                     'surfsara_power_usage', 
                     'node_netstat_Tcp_InSegs', 
                     'node_netstat_Tcp_OutSegs', 
                     'node_network_transmit_packets_total-sum', 
                     'node_filesystem_size_bytes-sum', 
                     'node_filesystem_files-sum', 
                     'node_memory_MemFree_bytes', 
                     'node_netstat_Tcp_InErrs']


In [27]:
# Extract relevant columns
df_selected = df[['timestamp', 'state'] + selected_features].copy()

# Encode the target variable 'state' to binary (0 for "COMPLETED", 1 otherwise)
df_selected['target'] = (df_selected['state'] != 'COMPLETED').astype(int)

# Drop the original 'state' column
df_selected.drop('state', axis=1, inplace=True)

In [5]:
# Define time intervals
time_intervals = {'minute': '1T', 'hour': '1H', 'day': '1D'}

# Normalize selected features
scaler = MinMaxScaler()
df_selected[selected_features] = scaler.fit_transform(df_selected[selected_features])

# Set sequence length
sequence_length = 30

# Number of time steps to predict into the future
prediction_steps = 7

# Define early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)


In [6]:
# Function to prepare data for LSTM
def prepare_lstm_data(data, time_interval):
    data.set_index('timestamp', inplace=True)
    data_resampled = data.resample(time_interval).sum()
    data_resampled['target'] = data_resampled['target'].clip(upper=1)  # Clip values to 1
    return data_resampled

# Function to create sequences for LSTM
def create_lstm_sequences(data, sequence_length):
    sequences, targets = [], []
    for i in range(len(data) - sequence_length):
        seq = data.iloc[i:i+sequence_length].values
        target = data.iloc[i+sequence_length]['target']
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)


In [7]:
# Function to make predictions on new data
def predict_future_failures(model, input_data, sequence_length, prediction_steps):
    predictions = []

    for _ in range(prediction_steps):
        # Make a prediction for the next time step
        prediction = model.predict(input_data.reshape(1, sequence_length, input_data.shape[1]))
        predictions.append(prediction[0, 0])

        # Shift the input data by one time step and append the new prediction
        input_data = np.roll(input_data, shift=-1, axis=0)
        input_data[-1, -1] = prediction[0, 0]

    return predictions


# 30 days -> 7 days

In [19]:
# Prepare data for LSTM with daily intervals
lstm_data_day = prepare_lstm_data(df_selected, time_intervals['day'])

# Create sequences and targets
sequences_day, targets_day = create_lstm_sequences(lstm_data_day, sequence_length)

# Split the data into training and testing sets
X_train_day, X_test_day, y_train_day, y_test_day = train_test_split(sequences_day, targets_day, test_size=0.3, random_state=1)

# Build the LSTM model
lstm_model_day = Sequential()
lstm_model_day.add(LSTM(50, input_shape=(X_train_day.shape[1], X_train_day.shape[2])))
lstm_model_day.add(Dense(1, activation='sigmoid'))
lstm_model_day.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])

# Train the model
lstm_model_day.fit(X_train_day, y_train_day, epochs=20, batch_size=32, validation_split=0.1)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7fd847deded0>

In [20]:
# Evaluate the model using Mean Squared Error
mse_day = lstm_model_day.evaluate(X_test_day, y_test_day, verbose=0)[1]
print(f'Model Mean Squared Error: {mse_day:.4f}\n')

input_data = X_test_day[3]  # Can be any valid starting point

# Make predictions
predicted_failures = predict_future_failures(lstm_model_day, input_data, sequence_length, prediction_steps)

# Denormalize the predicted failures 
predicted_failures_denormalized = predicted_failures * (lstm_data_day['target'].max() - lstm_data_day['target'].min()) + lstm_data_day['target'].min()

# Print the predicted failures
print("Predicted failures for the next 7 days:")
print(predicted_failures_denormalized)

# Evaluate the predictions using Mean Squared Error
mse_predictions = np.mean((predicted_failures - y_test_day[3:3+prediction_steps])**2)
print(f'\nMean Squared Error for Predictions: {mse_predictions:.4f}\n')

# Evaluate the predictions using Mean Absolute Error
mae_predictions = np.mean(np.abs(predicted_failures - y_test_day[3:3+prediction_steps]))
print(f'Mean Absolute Error for Predictions: {mae_predictions:.4f}')


Model Mean Squared Error: 0.1502

Predicted failures for the next 7 days:
[0.736503  0.6537022 0.705062  0.6537022 0.6537022 0.6537022 0.6537022]
Mean Absolute Error for Predictions: 0.3271
Mean Squared Error for Predictions: 0.1080


# 30 hours -> 7 hours

In [24]:
# Prepare data for LSTM with hourly intervals
lstm_data_hour = prepare_lstm_data(df_selected, time_intervals['hour'])

# Create sequences and targets
sequences_hour, targets_hour = create_lstm_sequences(lstm_data_hour, sequence_length)

# Split the data into training and testing sets
X_train_hour, X_test_hour, y_train_hour, y_test_hour = train_test_split(sequences_hour, targets_hour, test_size=0.3, random_state=1)

# Build the LSTM model
lstm_model_hour = Sequential()
lstm_model_hour.add(LSTM(20, input_shape=(X_train_hour.shape[1], X_train_hour.shape[2])))
lstm_model_hour.add(Dense(1, activation='sigmoid'))
lstm_model_hour.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])

# Train the model
lstm_model_hour.fit(X_train_hour, y_train_hour, epochs=20, batch_size=32, validation_split=0.1, callbacks=[early_stopping])


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20


<keras.src.callbacks.History at 0x7fd97c7ba990>

In [25]:
# Evaluate the model using Mean Squared Error
mse_hour = lstm_model_hour.evaluate(X_test_hour, y_test_hour, verbose=0)[1]
print(f'Model Mean Squared Error: {mse_hour:.4f}\n')

input_data = X_test_hour[3]  # Can be any valid starting point

# Make predictions
predicted_failures = predict_future_failures(lstm_model_hour, input_data, sequence_length, prediction_steps)

# Denormalize the predicted failures 
predicted_failures_denormalized = predicted_failures * (lstm_data_hour['target'].max() - lstm_data_hour['target'].min()) + lstm_data_hour['target'].min()

# Print the predicted failures
print("Predicted Failures for the Next 7 Hours:")
print(predicted_failures_denormalized)

# Evaluate the predictions using Mean Squared Error
mse_predictions = np.mean((predicted_failures - y_test_hour[3:3+prediction_steps])**2)
print(f'\nMean Squared Error for Predictions: {mse_predictions:.4f}\n')

# Evaluate the predictions using Mean Absolute Error
mae_predictions = np.mean(np.abs(predicted_failures - y_test_hour[3:3+prediction_steps]))
print(f'Mean Absolute Error for Predictions: {mae_predictions:.4f}')



Model Mean Squared Error: 0.1920

Predicted Failures for the Next 7 Hours:
[0.44338006 0.44338036 0.44338274 0.44339988 0.44352636 0.44445175
 0.45080683]
Mean Squared Error for Predictions: 0.2300
Mean Absolute Error for Predictions: 0.4769


# 30 minutes -> 7 minutes - Overfitting

In [28]:
# Prepare data for LSTM with minute intervals
lstm_data_minute = prepare_lstm_data(df_selected, time_intervals['minute'])

# Create sequences and targets
sequences_minute, targets_minute = create_lstm_sequences(lstm_data_minute, sequence_length)

# Split the data into training and testing sets
X_train_minute, X_test_minute, y_train_minute, y_test_minute = train_test_split(sequences_minute, targets_minute, test_size=0.3, random_state=1)

# Build the LSTM model
lstm_model_minute = Sequential()

# 50 -> 10
lstm_model_minute.add(LSTM(10, input_shape=(X_train_minute.shape[1], X_train_minute.shape[2])))
lstm_model_minute.add(Dense(1, activation='sigmoid'))
lstm_model_minute.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])

# Train the model
lstm_model_minute.fit(X_train_minute, y_train_minute, epochs=5, batch_size=32, validation_split=0.1, callbacks=[early_stopping])


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7fd9681bdb10>

In [29]:
# Evaluate the model using Mean Squared Error
mse_minute = lstm_model_minute.evaluate(X_test_minute, y_test_minute, verbose=0)[1]
print(f'Model Mean Squared Error: {mse_minute:.4f}\n')

input_data = X_test_minute[3]  # Can be any valid starting point

# Make predictions
predicted_failures = predict_future_failures(lstm_model_minute, input_data, sequence_length, prediction_steps)

# Denormalize the predicted failures
predicted_failures_denormalized = predicted_failures * (lstm_data_minute['target'].max() - lstm_data_minute['target'].min()) + lstm_data_minute['target'].min()

# Print the predicted failures
print("Predicted failures for the next 7 minutes:")
print(predicted_failures_denormalized)

# Evaluate the predictions using Mean Squared Error
mse_predictions = np.mean((predicted_failures - y_test_minute[3:3+prediction_steps])**2)
print(f'\nMean Squared Error for Predictions: {mse_predictions:.4f}\n')

# Evaluate the predictions using Mean Absolute Error
mae_predictions = np.mean(np.abs(predicted_failures - y_test_minute[3:3+prediction_steps]))
print(f'Mean Absolute Error for Predictions: {mae_predictions:.4f}')


Model Mean Squared Error: 0.1972

Predicted failures for the next 7 minutes:
[0.52470654 0.52470654 0.52470654 0.52470654 0.52470654 0.52470654
 0.52470654]
Mean Squared Error for Predictions: 0.2541

Mean Absolute Error for Predictions: 0.5035
