In [2]:
import pandas as pd
import numpy as np

In [167]:
from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import average_precision_score as ap, roc_auc_score as auc_roc

In [181]:
train = pd.read_csv("train_data_iitm.csv")
test = pd.read_csv("test_data_iitm.csv")

train.head()

Unnamed: 0,UID,part_serial_no,type,ambient_temperature,process_temperature,rotational_speed,torque,tool_wear,machine_failure,Type_1_failure,Type_2_failure,Type_3_failure,Type_4_failure,Type_5_failure
0,12695,M17554,M,300.1,309.6,1625,38.0,46,0,0,0,0,0,0
1,15141,M20000,M,304.3,313.6,1585,32.1,202,0,0,0,0,0,0
2,12569,L49748,L,299.6,309.2,1450,43.1,160,0,0,0,0,0,0
3,13672,M18531,M,302.3,311.7,1486,39.6,147,0,0,0,0,0,0
4,17428,L54607,L,300.0,311.4,1634,31.9,0,0,0,0,0,0,0


In [182]:
test.head()

Unnamed: 0,UID,part_serial_no,type,ambient_temperature,process_temperature,rotational_speed,torque,tool_wear
0,19954,L57133,L,298.1,307.8,1565,35.3,119
1,13851,M18710,M,302.3,311.0,1421,41.3,182
2,14963,M19822,M,304.0,312.7,1456,51.2,160
3,13887,L51066,L,302.4,311.3,1392,50.5,51
4,15438,M20297,M,302.9,312.6,1394,46.4,82


### Data Preprocessing

In [123]:
#Adding extra wear time based on Time.
train.loc[train["type"] == "H", "tool_wear"] += 5
train.loc[train["type"] == "M", "tool_wear"] += 3
train.loc[train["type"] == "L", "tool_wear"] += 2

#Converting Type to int
train.loc[train["type"] == "H", "type_int"] = 2
train.loc[train["type"] == "M", "type_int"] = 1
train.loc[train["type"] == "L", "type_int"] = 0

train.describe()

Unnamed: 0,UID,ambient_temperature,process_temperature,rotational_speed,torque,tool_wear,machine_failure,Type_1_failure,Type_2_failure,Type_3_failure,Type_4_failure,Type_5_failure,type_int
count,8000.0,8000.0,8000.0,8000.0,8000.0,8000.0,8000.0,8000.0,8000.0,8000.0,8000.0,8000.0,8000.0
mean,15018.4435,300.008637,310.009537,1539.066,39.962725,110.484625,0.0345,0.004875,0.012,0.009125,0.01,0.001625,0.496875
std,2885.300752,2.002711,1.482766,178.707689,9.96738,63.878857,0.182521,0.069655,0.108892,0.095094,0.099505,0.040281,0.66693
min,10001.0,295.3,305.7,1181.0,3.8,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,12532.75,298.3,308.8,1423.0,33.1,55.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,15026.5,300.1,310.1,1504.0,40.0,111.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,17518.5,301.5,311.1,1614.0,46.8,166.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
max,20000.0,304.5,313.8,2886.0,76.6,256.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0


In [124]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8000 entries, 0 to 7999
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   UID                   8000 non-null   int64  
 1   part_serial_no        8000 non-null   object 
 2   type                  8000 non-null   object 
 3    ambient_temperature  8000 non-null   float64
 4   process_temperature   8000 non-null   float64
 5   rotational_speed      8000 non-null   int64  
 6   torque                8000 non-null   float64
 7   tool_wear             8000 non-null   int64  
 8   machine_failure       8000 non-null   int64  
 9   Type_1_failure        8000 non-null   int64  
 10  Type_2_failure        8000 non-null   int64  
 11  Type_3_failure        8000 non-null   int64  
 12  Type_4_failure        8000 non-null   int64  
 13  Type_5_failure        8000 non-null   int64  
 14  type_int              8000 non-null   float64
dtypes: float64(4), int64(

In [125]:
#Dropping UID
train = train.drop(columns = ["UID"])

In [85]:
#Finding mismatch between the Type in Serial No and Type
train[train["part_serial_no"][0] == train["type"]]

Unnamed: 0,part_serial_no,type,ambient_temperature,process_temperature,rotational_speed,torque,tool_wear,machine_failure,Type_1_failure,Type_2_failure,Type_3_failure,Type_4_failure,Type_5_failure


In [126]:
train = train.drop(columns = ["part_serial_no", "type"])

In [127]:
train.head()

Unnamed: 0,ambient_temperature,process_temperature,rotational_speed,torque,tool_wear,machine_failure,Type_1_failure,Type_2_failure,Type_3_failure,Type_4_failure,Type_5_failure,type_int
0,300.1,309.6,1625,38.0,49,0,0,0,0,0,0,1.0
1,304.3,313.6,1585,32.1,205,0,0,0,0,0,0,1.0
2,299.6,309.2,1450,43.1,162,0,0,0,0,0,0,0.0
3,302.3,311.7,1486,39.6,150,0,0,0,0,0,0,1.0
4,300.0,311.4,1634,31.9,2,0,0,0,0,0,0,0.0


In [128]:
#Creating Output
train["Type_2_failure"] = 2*train["Type_2_failure"]
train["Type_3_failure"] = 3*train["Type_3_failure"]
train["Type_4_failure"] = 4*train["Type_4_failure"]
train["Type_5_failure"] = 5*train["Type_5_failure"]

train["Y"] = train["Type_1_failure"] + train["Type_2_failure"] + train["Type_3_failure"] + train["Type_4_failure"] + train["Type_5_failure"]

In [129]:
train = train.drop(columns = ["machine_failure", "Type_1_failure", "Type_2_failure", "Type_3_failure", "Type_4_failure", "Type_5_failure"])

### Feature Extraction

In [130]:
train["difference_temperature"] = train["process_temperature"] - train[" ambient_temperature"]

# As we know Torque is proportional to [Torque / Speed (rpm)]
train["power"] = train["torque"] / train["rotational_speed"]

train.head()

Unnamed: 0,ambient_temperature,process_temperature,rotational_speed,torque,tool_wear,type_int,Y,difference_temperature,power
0,300.1,309.6,1625,38.0,49,1.0,0,9.5,0.023385
1,304.3,313.6,1585,32.1,205,1.0,0,9.3,0.020252
2,299.6,309.2,1450,43.1,162,0.0,0,9.6,0.029724
3,302.3,311.7,1486,39.6,150,1.0,0,9.4,0.026649
4,300.0,311.4,1634,31.9,2,0.0,0,11.4,0.019523


### Fitting

In [132]:
train1 = np.array(train)
x = pd.DataFrame(train, columns = ["type_int", " ambient_temperature", "process_temperature", "rotational_speed", "torque", "tool_wear", "difference_temperature", "power"])
y = pd.DataFrame(train, columns = ["Y"])

mapping = {
    "H" : 2,
    "M" : 1,
    "L" : 0
}

x1 = np.array(x)
y1 = np.array(y)

In [137]:
x_train, x_test, y_train, y_test = train_test_split(x1, y1, test_size=0.10, random_state=42)

In [159]:
params = {
 'max_depth': [8],
 'learning_rate': [.05],
 'n_estimators' : [120]
}

model = XGBClassifier(objective= 'multi:softmax')
skf = StratifiedKFold(shuffle = True)
grid = GridSearchCV(model, params, cv=skf.split(x_train, y_train))

In [160]:
grid.fit(x_train, y_train)
grid.best_params_

  return f(*args, **kwargs)




  return f(*args, **kwargs)




  return f(*args, **kwargs)




  return f(*args, **kwargs)




  return f(*args, **kwargs)




  return f(*args, **kwargs)


0.9825


In [161]:
print(grid.score(x_test, y_test))

0.9825


In [197]:
print(grid.score(x_train, y_train))

0.9984722222222222


### Testing on Test data

In [183]:
test.head()

Unnamed: 0,UID,part_serial_no,type,ambient_temperature,process_temperature,rotational_speed,torque,tool_wear
0,19954,L57133,L,298.1,307.8,1565,35.3,119
1,13851,M18710,M,302.3,311.0,1421,41.3,182
2,14963,M19822,M,304.0,312.7,1456,51.2,160
3,13887,L51066,L,302.4,311.3,1392,50.5,51
4,15438,M20297,M,302.9,312.6,1394,46.4,82


In [184]:
#Adding extra wear time based on Time.
test.loc[test["type"] == "H", "tool_wear"] += 5
test.loc[test["type"] == "M", "tool_wear"] += 3
test.loc[test["type"] == "L", "tool_wear"] += 2

#Converting Type to int
test.loc[test["type"] == "H", "type_int"] = 2
test.loc[test["type"] == "M", "type_int"] = 1
test.loc[test["type"] == "L", "type_int"] = 0

test.describe()

Unnamed: 0,UID,ambient_temperature,process_temperature,rotational_speed,torque,tool_wear,type_int
count,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0
mean,14928.726,299.9901,309.98965,1537.6165,40.08365,110.8195,0.514
std,2892.876076,1.99085,1.487866,181.611921,9.977054,62.76694,0.688506
min,10007.0,295.5,305.9,1168.0,5.8,2.0,0.0
25%,12373.75,298.3,308.8,1423.0,33.5,57.0,0.0
50%,14912.5,300.1,310.1,1499.0,40.4,110.0,0.0
75%,17434.25,301.5,311.1,1606.0,46.7,164.0,1.0
max,19999.0,304.4,313.8,2825.0,73.6,246.0,2.0


In [185]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   UID                   2000 non-null   int64  
 1   part_serial_no        2000 non-null   object 
 2   type                  2000 non-null   object 
 3    ambient_temperature  2000 non-null   float64
 4   process_temperature   2000 non-null   float64
 5   rotational_speed      2000 non-null   int64  
 6   torque                2000 non-null   float64
 7   tool_wear             2000 non-null   int64  
 8   type_int              2000 non-null   float64
dtypes: float64(4), int64(3), object(2)
memory usage: 140.8+ KB


In [186]:
#Dropping UID
test = test.drop(columns = ["UID"])

In [187]:
#Finding mismatch between the Type in Serial No and Type
test[test["part_serial_no"][0] == test["type"]]

Unnamed: 0,UID,part_serial_no,type,ambient_temperature,process_temperature,rotational_speed,torque,tool_wear,machine_failure,Type_1_failure,Type_2_failure,Type_3_failure,Type_4_failure,Type_5_failure


In [188]:
test = test.drop(columns = ["part_serial_no", "type"])

In [177]:
test.head()

Unnamed: 0,UID,part_serial_no,type,ambient_temperature,process_temperature,rotational_speed,torque,tool_wear,type_int
0,19954,L57133,L,298.1,307.8,1565,35.3,121,0.0
1,13851,M18710,M,302.3,311.0,1421,41.3,185,1.0
2,14963,M19822,M,304.0,312.7,1456,51.2,163,1.0
3,13887,L51066,L,302.4,311.3,1392,50.5,53,0.0
4,15438,M20297,M,302.9,312.6,1394,46.4,85,1.0


### Feature Extraction

In [189]:
test["difference_temperature"] = test["process_temperature"] - test[" ambient_temperature"]

# As we know Torque is proportional to [Torque / Speed (rpm)]
test["power"] = train["torque"] / test["rotational_speed"]

test.head()

Unnamed: 0,ambient_temperature,process_temperature,rotational_speed,torque,tool_wear,type_int,difference_temperature,power
0,298.1,307.8,1565,35.3,121,0.0,9.7,0.024281
1,302.3,311.0,1421,41.3,185,1.0,8.7,0.02259
2,304.0,312.7,1456,51.2,163,1.0,8.7,0.029602
3,302.4,311.3,1392,50.5,53,0.0,8.9,0.028448
4,302.9,312.6,1394,46.4,85,1.0,9.7,0.022884


In [194]:
y_predict = grid.predict(x_test)

In [195]:
print(y_predict)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 2 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 3 0 0
 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3
 0 0 0 0 3 0 0 0 0 0 2 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 2 0 0 

In [196]:
np.savetxt("CE18B047_Predictions", y)