In [14]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
file_path = "myData2.parquet"
df = pd.read_parquet(file_path)


In [39]:
df['ML_Node'].value_counts()

ML_Node
0    461500
Name: count, dtype: int64

In [25]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler
from tcn import TCN

In [16]:
selected_features = ['timestamp_seconds', # lowers the accuracy 
                     'node_memory_Percpu_bytes', 
                     'node_context_switches_total', 
                     'surfsara_power_usage', 
                     'node_netstat_Tcp_InSegs', 
                     'node_netstat_Tcp_OutSegs', 
                     'node_network_transmit_packets_total-sum', 
                     'node_filesystem_size_bytes-sum', 
                     'node_filesystem_files-sum', 
                     'node_memory_MemFree_bytes', 
                     'node_netstat_Tcp_InErrs']


In [17]:
# Add a new column 'failed_jobs' representing the target variable
df['failed_jobs'] = (df['state'] == 'FAILED').astype(int)

# Extract relevant columns
df_selected = df[['timestamp', 'state'] + selected_features].copy()

# Encode the target variable 'state' to binary (0 for "COMPLETED", 1 otherwise)
df_selected['target'] = (df_selected['state'] != 'COMPLETED').astype(int)

# Drop the original 'state' column
df_selected.drop('state', axis=1, inplace=True)


In [18]:
# Define time intervals
time_intervals = {'minute': '1T', 'hour': '1H', 'day': '1D'}


In [19]:
# Normalize selected features
scaler = MinMaxScaler()
df_selected[selected_features] = scaler.fit_transform(df_selected[selected_features])


In [20]:
# Set sequence length
sequence_length = 30

In [21]:
# Function to prepare data for TCN
def prepare_tcn_data(data, time_interval):
    data.set_index('timestamp', inplace=True) # FixMe
    data_resampled = data.resample(time_interval).sum()
    data_resampled['target'] = data_resampled['target'].clip(upper=1)  # Clip values to 1
    return data_resampled


In [22]:
# Function to create sequences for TCN
def create_tcn_sequences(data, sequence_length):
    sequences, targets = [], []
    for i in range(len(data) - sequence_length):
        seq = data.iloc[i:i+sequence_length].values
        target = data.iloc[i+sequence_length]['target']
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

# Hour

In [23]:
# Prepare data for TCN with minute intervals
tcn_data_hour = prepare_tcn_data(df_selected, time_intervals['hour'])

# Create sequences and targets
sequences_hour, targets_hour = create_tcn_sequences(tcn_data_hour, sequence_length)

# Split the data into training and testing sets
X_train_hour, X_test_hour, y_train_hour, y_test_hour = train_test_split(sequences_hour, targets_hour, test_size=0.3, random_state=42)


In [29]:
# Build the TCN model
tcn_model_hour = Sequential([
    TCN(input_shape=(sequence_length, X_train_hour.shape[2])),
    Dense(1, activation='sigmoid')
])

In [37]:
# Compile the model
tcn_model_hour.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
tcn_model_hour.fit(X_train_hour, y_train_hour, epochs=20, batch_size=32, validation_split=0.1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7f93b05efdd0>

In [38]:
# Function to make predictions on new data for TCN model
def predict_future_failures_tcn(model, input_data, sequence_length, prediction_steps):
    predictions = []

    for _ in range(prediction_steps):
        # Make a prediction for the next time step
        prediction = model.predict(input_data.reshape(1, sequence_length, input_data.shape[1]))
        predictions.append(prediction[0, 0])

        # Shift the input data by one time step and append the new prediction
        input_data = np.roll(input_data, shift=-1, axis=0)
        input_data[-1, -1] = prediction[0, 0]

    return predictions

In [35]:
# Evaluate the TCN model
loss, accuracy = tcn_model_hour.evaluate(X_test_hour, y_test_hour)
print(f'Model Accuracy: {accuracy * 100:.2f}%')

# Select a starting point for predictions
input_data_tcn = X_test_hour[10]

# Number of time steps to predict into the future
prediction_steps_tcn = 7

# Make predictions with the TCN model
predicted_failures_tcn = predict_future_failures_tcn(tcn_model_hour, input_data_tcn, sequence_length, prediction_steps_tcn)

# Print the predicted failures for TCN
print("Predicted Failures for the Next 7 Time Steps (TCN):")
print(predicted_failures_tcn)


Model Accuracy: 83.52%
Predicted Failures for the Next 7 Time Steps (TCN):
[1.1323531e-08, 0.00015400539, 0.037686497, 0.99476063, 0.7563474, 0.1785389, 1.2227679e-06]


In [44]:
import numpy as np

def safe_smape(y_true, y_pred):
    denominator = np.abs(y_true) + np.abs(y_pred)
    diff = np.abs(y_true - y_pred) / denominator
    diff[denominator == 0] = 0  # Avoid division by zero
    return 200 * np.mean(diff)



In [45]:
# Make predictions on the test data
y_pred_hour = tcn_model_hour.predict(X_test_hour)

smape_score = safe_smape(y_test_hour, y_pred_hour)



In [46]:
smape_score

156.21057500974092