In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle as pk


#reading the data
data = pd.read_csv("Experimental values.csv")
data.head()

In [None]:
data

In [None]:
#removing the first column i.e S.No
data.drop('S. No.', axis=1, inplace=True)

In [None]:
type(data)

In [None]:
data.info()

In [None]:
import seaborn as sns
cor = data.corr(numeric_only=True)
sns.heatmap(cor, annot=True, cmap='coolwarm')

In [None]:
for con in data["Cooling Conditions"].unique():
    wear = data[data["Cooling Conditions"] == con]["Wear(micrometer)"]
    plt.plot(wear, label=con)
plt.ylabel("Wear in micrometer")
plt.legend()
plt.show()

In [None]:
data["Cooling Conditions"].value_counts()

In [None]:
from sklearn.model_selection import train_test_split
x=data.iloc[:,0:3]
y1=data.iloc[:,3]
y2=data.iloc[:,4]
y3=data.iloc[:,5]

In [None]:
x= pd.get_dummies(x, columns=["Cooling Conditions"], drop_first=True)


In [None]:
# predicting the coefficient of friction
# specified the stratify parameter to ensure that the all the cooling conditions exist in test data

x_train,x_test,y_train,y_test=train_test_split(x,y1,test_size=0.2,stratify=data["Cooling Conditions"],random_state=0)

In [None]:
x_test

In [None]:
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.model_selection import cross_val_score,KFold
from sklearn.metrics import mean_squared_error, r2_score,mean_absolute_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR


def build_model(model,name):
    model.fit(x_train,y_train)
    y_pred=model.predict(x_test)
    print(name)
    print("Training Score: ",model.score(x_train,y_train))    
    print("Testing Score: ",model.score(x_test,y_test))
    print("Mean squared error: ",mean_squared_error(y_test,y_pred))
    print("\n")


In [None]:
build_model(LinearRegression(),"Linear Regression")
alpha = [0.1,0.2,0.3,0.4,0.5,0.6]
for i in alpha:
    build_model(Ridge(alpha=i),"Ridge alpha: "+str(i))
for i in alpha:
    build_model(Lasso(alpha=i),"Lasso alpha: "+str(i))

build_model(DecisionTreeRegressor(),"Decision Tree")

build_model(RandomForestRegressor(),"Random Forest")

build_model(GradientBoostingRegressor(),"Gradient Boosting")

build_model(SVR(),"Support Vector Regression")


Out of all the models, we got good score from LinearRegressiona and RidgeRegression with alpha with 0.1 

In [None]:
ridge = Ridge(alpha=0.01)
ridge.fit(x,y1)
y_pred = ridge.predict(x_test)
print("Ridge")
print("Training Score: ",ridge.score(x_train,y_train))

print("Testing Score: ",ridge.score(x_test,y_test))

with open("./full-data/coeefficent-of-friction-with-ridge.pkl","wb") as file:
    pk.dump(ridge,file)

In [None]:
lr = LinearRegression()

lr.fit(x,y1)
print("Linear Regression")
print("Training Score: ",lr.score(x_train,y_train))
print("Testing Score: ",lr.score(x_test,y_test))

with open("./full-data/coeefficent-of-friction-with-linear-regression.pkl","wb") as file:
    pk.dump(lr,file)

In [None]:
# prediction of Frictional Force
x_train,x_test,y_train,y_test=train_test_split(x,y2,test_size=0.2,stratify=data["Cooling Conditions"],random_state=0)

In [None]:
build_model(LinearRegression(),"Linear Regression")
alpha = [0.1,0.2,0.3,0.4,0.5,0.6]
for i in alpha:
    build_model(Ridge(alpha=i),"Ridge alpha: "+str(i))
for i in alpha:
    build_model(Lasso(alpha=i),"Lasso alpha: "+str(i))

build_model(DecisionTreeRegressor(),"Decision Tree")

build_model(RandomForestRegressor(),"Random Forest")

build_model(GradientBoostingRegressor(),"Gradient Boosting")

build_model(SVR(),"Support Vector Regression")


In [None]:
# final model for frictional force
ridge = Ridge(alpha=0.21)
ridge.fit(x,y2)

y_pred = ridge.predict(x_test)
print("Ridge")
print("Training Score: ",ridge.score(x_train,y_train))
print("Testing Score: ",ridge.score(x_test,y_test))
print("Mean squared error: ",mean_squared_error(y_test,y_pred))




In [None]:

with open("./full-data/frictional-force-with-ridge.pkl","wb") as file:
    pk.dump(ridge,file)

In [None]:
# prediction of Wear
x_train,x_test,y_train,y_test=train_test_split(x,y3,test_size=0.2,stratify=data["Cooling Conditions"],random_state=0)


In [None]:
build_model(LinearRegression(),"Linear Regression")
alpha = [0.1,0.2,0.3,0.4,0.5,0.6]
for i in alpha:
    build_model(Ridge(alpha=i),"Ridge alpha: "+str(i))
for i in alpha:
    build_model(Lasso(alpha=i),"Lasso alpha: "+str(i))

build_model(DecisionTreeRegressor(),"Decision Tree")

build_model(RandomForestRegressor(),"Random Forest")

build_model(GradientBoostingRegressor(),"Gradient Boosting")

build_model(SVR(),"Support Vector Regression")


# lasso and linear both performed well on test data


In [None]:
# first we try with lasso
alpha = np.arange(0.01,0.1,0.01)
lasso = Lasso()
for i in alpha:
    build_model(Lasso(alpha=i),"Lasso alpha: "+str(i))


In [None]:
lasso=Lasso(alpha=0.01)
lasso.fit(x,y3)
with open("./full-data/wear-with-lasso.pkl","wb") as file:
    pk.dump(lasso,file)

In [None]:
#linear Regression 
lr = LinearRegression()
lr.fit(x,y3)
y_pred = lr.predict(x_test)
print("Linear Regression")
print("Training Score: ",lr.score(x_train,y_train))
print("Testing Score: ",lr.score(x_test,y_test))
print("Mean Squared Error: ", mean_squared_error(y_test,y_pred))
print("Mean Absolute Error: ", mean_absolute_error(y_test,y_pred))
r2_score(y_test,y_pred)

In [None]:
with open("./full-data/wear-with-linear-regression.pkl","wb") as file:
    pk.dump(lr,file)

In [None]:
# final models
# for coefficient of friction prediction we go with Ridge with alpha = 0.01 and LinearRegression
# for frictional force prediction we go with  Ridge with alpha = 0.21
# for wear prediction we go with Lasso with alpha = 0.01 and LinearRegression

