In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from joblib import dump, load

# Step 1: Load the dataset
# Replace 'your_dataset.csv' with your actual dataset file path
df = pd.read_csv('/content/drive/MyDrive/Dataset/predictive_maintenance.csv')

# Step 2: Data Preprocessing
# Separate the target columns and features (Drop UID and Product ID)
X = df.drop(columns=['UDI', 'Product ID', 'Target', 'Failure Type'])  # Dropping UID, Product ID, and Target
y = df['Failure Type']  # Using 'Failure Type' as the label for prediction

# Define numerical columns (you can adjust the columns as needed)
numerical_cols = ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']
# No categorical columns to encode in this dataset
categorical_cols = []

# Preprocessing for numerical data: StandardScaler for scaling
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols)
    ])

# Step 3: Define Models
# Try a few different models in the pipeline
models = [
    ('RandomForest', RandomForestClassifier()),
    ('GradientBoosting', GradientBoostingClassifier()),
    ('LogisticRegression', LogisticRegression())
]

# Set up the pipeline with the preprocessor and placeholder for the classifier
pipe = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', models[0][1])])

# Define parameter grid for hyperparameter tuning
param_grid = [
    {
        'classifier': [RandomForestClassifier()],
        'classifier__n_estimators': [50, 100, 200],
        'classifier__max_depth': [None, 10, 20]
    },
    {
        'classifier': [GradientBoostingClassifier()],
        'classifier__learning_rate': [0.01, 0.1, 0.2],
        'classifier__n_estimators': [50, 100, 200],
        'classifier__max_depth': [3, 5, 10]
    },
    {
        'classifier': [LogisticRegression(max_iter=200)],
        'classifier__C': [0.1, 1.0, 10.0]
    }
]

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Hyperparameter Tuning with GridSearchCV
grid_search = GridSearchCV(pipe, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Step 6: Best model and evaluation
best_model = grid_search.best_estimator_
print("Best Model:", grid_search.best_params_)
print("Best Cross-Validation Score:", grid_search.best_score_)

# Evaluate on test set
y_pred = best_model.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Step 7: Save the model
model_path = 'best_predictive_maintenance_model.joblib'
dump(best_model, model_path)
print(f"Model saved to {model_path}")


Best Model: {'classifier': GradientBoostingClassifier(), 'classifier__learning_rate': 0.01, 'classifier__max_depth': 5, 'classifier__n_estimators': 200}
Best Cross-Validation Score: 0.9821250000000001
Test Accuracy: 0.983
Classification Report:
                           precision    recall  f1-score   support

Heat Dissipation Failure       0.93      0.93      0.93        15
              No Failure       0.99      1.00      0.99      1935
      Overstrain Failure       0.70      0.54      0.61        13
           Power Failure       0.79      0.75      0.77        20
         Random Failures       0.00      0.00      0.00         6
       Tool Wear Failure       0.00      0.00      0.00        11

                accuracy                           0.98      2000
               macro avg       0.57      0.54      0.55      2000
            weighted avg       0.97      0.98      0.98      2000

Confusion Matrix:
 [[  14    1    0    0    0    0]
 [   0 1930    1    4    0    0]
 [   0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from joblib import load
import pandas as pd

# Step 8: Load the saved model
model_path = 'best_predictive_maintenance_model.joblib'
loaded_model = load(model_path)
print(f"Model loaded from {model_path}")

# Sample Data for Testing
sample_data = pd.DataFrame({
    'Type': [0],                    # Type 'L'
    'Air temperature [K]': [298.9],  # Example temperature
    'Process temperature [K]': [309.1],  # Process temperature
    'Rotational speed [rpm]': [2861],  # Speed in rpm
    'Torque [Nm]': [4.6],           # Torque value
    'Tool wear [min]': [143]         # Tool wear in minutes
})

print("Sample Data Values:\n", sample_data)

# Step 10: Make a prediction using the loaded model
sample_pred = loaded_model.predict(sample_data)
print("Predicted Failure Type:", sample_pred[0])


Model loaded from best_predictive_maintenance_model.joblib
Sample Data Values:
    Type  Air temperature [K]  Process temperature [K]  Rotational speed [rpm]  \
0     0                298.9                    309.1                    2861   

   Torque [Nm]  Tool wear [min]  
0          4.6              143  
Predicted Failure Type: Power Failure
