In [2]:
import pandas as pd 
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import PowerTransformer
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.tree import DecisionTreeClassifier

In [3]:
df = pd.read_csv("../Data/processed_data.csv")
df.head()

Unnamed: 0,Assembly_Line_No,Hydraulic_Pressure(bar),Coolant_Pressure(bar),Air_System_Pressure(bar),Coolant_Temperature,Hydraulic_Oil_Temperature(?C),Spindle_Bearing_Temperature(?C),Spindle_Vibration(?m),Tool_Vibration(?m),Spindle_Speed(RPM),Voltage(volts),Torque(Nm),Cutting(kN),Downtime
0,Shopfloor-L1,71.04,6.933725,6.284965,25.6,46.0,33.4,1.291,26.492,25892.0,335.0,24.055326,3.58,0
1,Shopfloor-L1,125.33,4.936892,6.196733,35.3,47.4,34.6,1.382,25.274,19856.0,368.0,14.20289,2.68,0
2,Shopfloor-L3,71.12,6.839413,6.655448,13.1,40.7,33.0,1.319,30.608,19851.0,325.0,24.049267,3.55,0
3,Shopfloor-L2,139.34,4.574382,6.560394,24.4,44.2,40.6,0.618,30.791,18461.0,360.0,25.860029,3.55,0
4,Shopfloor-L1,60.51,6.893182,6.141238,4.1,47.3,31.4,0.983,25.516,26526.0,354.0,25.515874,3.55,0


In [4]:
df.head()

Unnamed: 0,Assembly_Line_No,Hydraulic_Pressure(bar),Coolant_Pressure(bar),Air_System_Pressure(bar),Coolant_Temperature,Hydraulic_Oil_Temperature(?C),Spindle_Bearing_Temperature(?C),Spindle_Vibration(?m),Tool_Vibration(?m),Spindle_Speed(RPM),Voltage(volts),Torque(Nm),Cutting(kN),Downtime
0,Shopfloor-L1,71.04,6.933725,6.284965,25.6,46.0,33.4,1.291,26.492,25892.0,335.0,24.055326,3.58,0
1,Shopfloor-L1,125.33,4.936892,6.196733,35.3,47.4,34.6,1.382,25.274,19856.0,368.0,14.20289,2.68,0
2,Shopfloor-L3,71.12,6.839413,6.655448,13.1,40.7,33.0,1.319,30.608,19851.0,325.0,24.049267,3.55,0
3,Shopfloor-L2,139.34,4.574382,6.560394,24.4,44.2,40.6,0.618,30.791,18461.0,360.0,25.860029,3.55,0
4,Shopfloor-L1,60.51,6.893182,6.141238,4.1,47.3,31.4,0.983,25.516,26526.0,354.0,25.515874,3.55,0


In [5]:
features=["Torque(Nm)","Hydraulic_Pressure(bar)","Cutting(kN)",
          "Coolant_Pressure(bar)","Spindle_Speed(RPM)","Coolant_Temperature"]

In [6]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(df[features],df["Downtime"],test_size=0.3,random_state =0)

In [7]:
X_train.head()

Unnamed: 0,Torque(Nm),Hydraulic_Pressure(bar),Cutting(kN),Coolant_Pressure(bar),Spindle_Speed(RPM),Coolant_Temperature
1988,32.720445,89.458747,2.11,5.177201,20809.0,12.2
1969,16.449554,70.83,2.87,4.801531,19352.0,26.1
1368,14.20289,62.7,2.57,4.406872,17919.0,28.7
840,19.0041,82.968808,3.0,6.823142,25121.0,33.6
2214,27.185354,71.96,3.67,6.863944,17561.0,26.4


In [8]:
y_train.head()

1988    1
1969    0
1368    0
840     1
2214    0
Name: Downtime, dtype: int64

In [9]:
# Simple imputer
trf1 =  ColumnTransformer(transformers=[('mean_imputer',SimpleImputer(strategy='mean'),[0,1,2,3,4,5])],remainder='passthrough')

In [10]:
# Scaling
trf2 = ColumnTransformer(transformers=[('scale',RobustScaler(),[0,1,2,3,4,5])])


In [11]:
#pipeline
pipe = Pipeline([('trf1',trf1),('trf2',trf2)])

In [12]:
X_train_processed = pipe.fit_transform(X_train)
X_test_processed = pipe.transform(X_test)

In [13]:
X_train_processed.shape

(1750, 6)

In [14]:
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

In [15]:
# Initialize and train the Logistic Regression model
log_reg = LogisticRegression(max_iter=200)
log_reg.fit(X_train_processed, y_train)

# Make predictions
y_pred = log_reg.predict(X_test_processed)

# Evaluate the model
print("Logistic Regression")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Logistic Regression
Accuracy: 0.8693333333333333
Confusion Matrix:
 [[323  60]
 [ 38 329]]


In [16]:
# Initialize and train the Decision Tree model
decision_tree = DecisionTreeClassifier(max_depth=100,random_state=42)
decision_tree.fit(X_train_processed, y_train)

# Make predictions
y_pred = decision_tree.predict(X_test_processed)

# Evaluate the model
print("Decision Tree")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Decision Tree
Accuracy: 0.984
Confusion Matrix:
 [[375   8]
 [  4 363]]


In [17]:
# Initialize and train the Random Forest model
random_forest = RandomForestClassifier(n_estimators=200, random_state=42)
random_forest.fit(X_train_processed, y_train)

# Make predictions
y_pred = random_forest.predict(X_test_processed)

# Evaluate the model
print("Random Forest")
print("Random Forest Accuracy:", random_forest.score(X_test_processed, y_test))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Random Forest
Random Forest Accuracy: 0.988
Accuracy: 0.988
Confusion Matrix:
 [[374   9]
 [  0 367]]


In [18]:
# Initialize and train the SVM model
svm_model = SVC(kernel='rbf', C=1, gamma=0.5)
svm_model.fit(X_train_processed, y_train)

# Make predictions
y_pred = svm_model.predict(X_test_processed)

# Evaluate the model
print("Support Vector Machine")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Support Vector Machine
Accuracy: 0.9533333333333334
Confusion Matrix:
 [[359  24]
 [ 11 356]]


In [19]:
# Initialize and train the KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_processed, y_train)

# Make predictions
y_pred = knn.predict(X_test_processed)

# Evaluate the model
print("K-Nearest Neighbors")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

K-Nearest Neighbors
Accuracy: 0.9613333333333334
Confusion Matrix:
 [[363  20]
 [  9 358]]


In [20]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import VotingClassifier
import numpy as np

In [21]:
# Ensemble Learning
estimators = [('Logistic Regression',log_reg),('Random Forest',random_forest),('KNN',knn),('SVC',svm_model),('Decision Tree',decision_tree)]
for estimator in estimators:
    x = cross_val_score(estimator[1],X_test_processed,y_test,cv=10,scoring='accuracy')
    print(estimator[0],":",np.round(np.mean(x),2))


Logistic Regression : 0.86
Random Forest : 0.98
KNN : 0.96
SVC : 0.96
Decision Tree : 0.95


In [26]:
vc = VotingClassifier(estimators=estimators,voting='hard')
#vc.fit(X_train_processed,y_train)
x = cross_val_score(vc,X_test_processed,y_test,cv=10,scoring='accuracy')
print("Validation accuracy for soft voting classifier",np.round(np.mean(x),2))

Validation accuracy for soft voting classifier 0.97


In [27]:
import pickle 
pickle.dump(pipe,open('preprocessing_pipeline.pkl','wb'))
pickle.dump(vc,open('voting_classifier_model.pkl','wb'))