In [None]:
#importing libraries

import random
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from random import randint as ri , shuffle 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping


In [None]:
output_data = pd.read_csv('C:/Users/nvarshney014/OneDrive - PwC/Desktop/Project/Data/output.csv')

In [None]:
scaler = StandardScaler()
features = ['volt','rotate','pressure','vibration']
output_data[features] = scaler.fit_transform(output_data[features])

label_encoder = LabelEncoder()
output_data['failure'] = label_encoder.fit_transform(output_data['failure'])


In [None]:
def create_sequences(output_data,seq_length):
    sequences = []
    labels = []
    machine_ids = output_data['machineID'].unique()
    for mid in machine_ids:
        machine_data=output_data[output_data['machineID']==mid]
        for i in range(len(machine_data)-seq_length):
            seq = machine_data.iloc[i:i+seq_length][features].values
            label = machine_data.iloc[i+seq_length]['failure']
            sequences.append(seq)
            labels.append(label)
    return np.array(sequences), np.array(labels)

seq_length=2
X,y = create_sequences(output_data,seq_length)


In [None]:
shuffled_indices = np.random.permutation(len(X))
X = X[shuffled_indices]
y = y[shuffled_indices]

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) 

In [None]:
from imblearn.over_sampling import SMOTE

X_train_reshaped = X_train.reshape(X_train.shape[0],-1)
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_reshaped, y_train)

X_train_resampled = X_train_resampled.reshape(X_train_resampled.shape[0], seq_length, len(features))

print(X_train_resampled.shape)
print(y_train_resampled.shape)

X_train_resampled, X_val, y_train_resampled, y_val = train_test_split(X_train_resampled, y_train_resampled, test_size=0.2, random_state=42, stratify=y_train_resampled)

In [None]:
model = Sequential([
    LSTM(units=20, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True),  # First LSTM layer
    LSTM(units=20, activation='relu',return_sequences=True),  # Second LSTM layer with return_sequences=True
    Dropout(0.5),
    LSTM(units=20, activation='relu'),
    Dense(units=20, activation='relu'),
    Dense(units=5, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print model summary
# dropout ko 0.5
model.summary() 

In [None]:
callback = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True, verbose=1, min_delta=0.00001)

In [None]:
from sklearn.utils.class_weight import compute_class_weight

class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train_resampled), y=y_train_resampled)
class_weights_dict = {i:class_weights[i] for i in range(len(class_weights))}

In [None]:
history = model.fit(X_train_resampled, y_train_resampled, epochs=5, batch_size=64, validation_data=(X_val, y_val), class_weight=class_weights_dict, callbacks=callback)

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

predictions_proba = model.predict(X_test)

In [None]:
predictions_labelled = np.argmax(predictions_proba, axis=1)

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy on test data: {accuracy * 100:.2f}%')

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

rep = classification_report(y_test , predictions_labelled)
conf= confusion_matrix(y_test, predictions_labelled)

print(rep)
print(conf)

In [None]:
import matplotlib.pyplot as plt

# Plot training history
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
print("training set class distribution:", np.bincount(y_train_resampled))
print("validation set class distribution:", np.bincount(y_val))
print("test set class distribution:", np.bincount(y_test))

In [None]:
model.save("historyexp.h5")

In [None]:
from tensorflow.keras.models import save_model

model.save("historyexp.keras")