In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score, roc_curve
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings("ignore")

In [None]:
pip install tensorflow




# New Section

In [None]:

# Step 2: Load the dataset
file_path = "/content/engine_failure_dataset.csv"  # Update if needed
data = pd.read_csv(file_path)


FileNotFoundError: [Errno 2] No such file or directory: '/content/engine_failure_dataset.csv'

In [None]:
# Step 3: EDA - Look at the data
print(data.head())
print(data.info())
print(data.describe())

In [None]:
# Plot correlation heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(data.corr(numeric_only=True), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap")
plt.show()

In [None]:
# Step 4: Encode categorical variables
le = LabelEncoder()
data["Operational_Mode"] = le.fit_transform(data["Operational_Mode"])

In [None]:
# Count of each fault type
sns.countplot(x='Fault_Condition', data=data)
plt.title("Fault Condition Distribution")
plt.show()

In [None]:
# Step 4: Drop columns we don't need
data = data.drop("Time_Stamp", axis=1)

In [None]:
# Step 5: Encode categorical variables
le = LabelEncoder()
data["Operational_Mode"] = le.fit_transform(data["Operational_Mode"])

In [None]:
# Step 6: Split features and target
X = data.drop("Fault_Condition", axis=1)
y = data["Fault_Condition"]


In [None]:
# Step 7: Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Step 8: Split into train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [None]:

# Step 9: Build a simple deep neural network
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(len(y.unique()), activation='softmax'))

# Step 10: Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Step 11: Train the model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=32)

In [None]:
# Step 12: Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)

In [None]:
# Step 13: Confusion matrix and classification report
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print("Classification Report:")
print(classification_report(y_test, y_pred_classes))

from sklearn.metrics import confusion_matrix  # Ensure this import is here
conf_matrix = confusion_matrix(y_test, y_pred_classes)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

In [None]:
# Step 14: ROC Curve for each class
# Convert labels to one-hot encoding
y_test_ohe = tf.keras.utils.to_categorical(y_test)
y_pred_proba = y_pred

for i in range(len(y.unique())):
    fpr, tpr, _ = roc_curve(y_test_ohe[:, i], y_pred_proba[:, i])
    auc = roc_auc_score(y_test_ohe[:, i], y_pred_proba[:, i])
    plt.plot(fpr, tpr, label='Class {} (AUC = {:.2f})'.format(i, auc))

plt.plot([0, 1], [0, 1], 'k--')
plt.title('AUC-ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.grid()
plt.show()
