UDDER SURFACE TEMPERATURE MODEL

In [5]:
# importing required libraries
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt

DATA PREPROCESSING

In [7]:
# loading and reading the dataset

temp_data = pd.read_csv('uddertemperature.csv')

In [8]:
temp_data.head()

Unnamed: 0,UdderSurfaceTemp,Status
0,28.94,Healthy
1,31.39,Healthy
2,30.41,Healthy
3,31.75,Healthy
4,33.09,Healthy


In [9]:
# Renaming some of the columns 
temp_data = temp_data.rename(columns={'Status':'target'})
temp_data.tail(2000)

Unnamed: 0,UdderSurfaceTemp,target
1124,33.58,subclinical
1125,33.44,subclinical
1126,33.84,subclinical
1127,34.15,subclinical
1128,34.01,subclinical
...,...,...
3119,37.25,clinical
3120,37.18,clinical
3121,37.17,clinical
3122,37.25,clinical


In [10]:
#data cleaning
#removing null values
temp_data = temp_data.dropna()
temp_data.isnull().sum()

UdderSurfaceTemp    0
target              0
dtype: int64

In [11]:
# check for missing values
missing_values = temp_data.isnull().sum().sort_values(ascending = False)
missing_values = missing_values[missing_values > 0]/temp_data.shape[0] # normalize
print(f'{missing_values *100} %')

Series([], dtype: float64) %


In [9]:
temp_data.columns


Index(['UdderSurfaceTemp', 'target'], dtype='object')

In [12]:
#replace the strings in target column with corresponding numbers
temp_data =temp_data.replace({'clinical': 2, 'subclinical': 1, 'Healthy': 0})
temp_data.head()

Unnamed: 0,UdderSurfaceTemp,target
0,28.94,0
1,31.39,0
2,30.41,0
3,31.75,0
4,33.09,0


In [13]:
'''temp_data.drop(['animal.num','age'], axis=1)'''

"temp_data.drop(['animal.num','age'], axis=1)"

MODEL BUILDING AND TRAINING

In [14]:
#spliting target and features
X = temp_data.drop(columns = 'target' )
Y = temp_data.target
'''X = temp_data.iloc[:,:3].values
Y = temp_data.iloc[:,3].values'''
print(Y)

0       0
1       0
2       0
3       0
4       0
       ..
3119    2
3120    2
3121    2
3122    2
3123    2
Name: target, Length: 3124, dtype: int64


In [16]:
# splitting our dataset into training and testing
X_train, X_test, Y_train, Y_test= train_test_split(X, Y, test_size= 0.25, random_state=42)

In [17]:
#feature scaling
scaler= StandardScaler()
X_train_scaler= scaler.fit_transform(X_train)
X_test_scaler= scaler.fit_transform(X_test)

RANDOM FOREST CLASSIFIER

In [18]:
# creating Random forest classifier
modelreg=RandomForestClassifier(n_estimators=20,criterion = 'entropy')
modelreg.fit(X_train_scaler, Y_train)
y_pred= modelreg.predict(X_test_scaler)
p = modelreg.score(X_test_scaler,Y_test)
print(p)

0.9820742637644047


MODEL EVALUATION AND ACCURACY

In [19]:
print('Classification Report\n', classification_report(Y_test, y_pred))
print('Accuracy: {}%\n'.format(round((accuracy_score(Y_test, y_pred)*100),2)))

Classification Report
               precision    recall  f1-score   support

           0       1.00      0.95      0.97       284
           1       0.95      1.00      0.97       241
           2       1.00      1.00      1.00       256

    accuracy                           0.98       781
   macro avg       0.98      0.98      0.98       781
weighted avg       0.98      0.98      0.98       781

Accuracy: 98.21%



In [20]:
# confusion matrix
cm = confusion_matrix(Y_test, y_pred)
print(cm)

[[270  14   0]
 [  0 241   0]
 [  0   0 256]]


LOGISTIC REGRESSION

In [21]:
# Logistic Regression 
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
regressor = LogisticRegression()
regressor.fit(X_train,Y_train)
print('Test Accuracy {:.2f}%'.format(regressor.score(X_test, Y_test)*100))

Test Accuracy 98.85%


K-NEAREST NEIGHBOR

In [22]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train,Y_train)
print('KNN Accuracy {:.2f}%'.format(knn.score(X_test,Y_test)*100))

KNN Accuracy 98.21%


SUPPORT VECTOR MACHINE

In [24]:
# Support Vactor 
from sklearn.svm import SVC
svm = SVC(random_state=1)
svm1 = SVC(kernel='linear',gamma='scale',random_state=0)
svm2 = SVC(kernel='rbf',gamma='scale',random_state=0)
svm3 = SVC(kernel='poly',gamma='scale',random_state=0)
svm4 = SVC(kernel='sigmoid',gamma='scale',random_state=0)



In [25]:
svm.fit(X_train,Y_train)
svm1.fit(X_train,Y_train)
svm2.fit(X_train,Y_train)
svm3.fit(X_train,Y_train)
svm4.fit(X_train,Y_train)



In [26]:
print('SVC Accuracy : {:,.2f}%'.format(svm.score(X_test,Y_test)*100))

print('SVC Liner Accuracy : {:,.2f}%'.format(svm1.score(X_test,Y_test)*100))

print('SVC RBF Accuracy : {:,.2f}%'.format(svm2.score(X_test,Y_test)*100))

print('SVC Ploy Accuracy : {:,.2f}%'.format(svm3.score(X_test,Y_test)*100))

print('SVC Sigmoid Accuracy : {:,.2f}%'.format(svm4.score(X_test,Y_test)*100))

SVC Accuracy : 98.46%
SVC Liner Accuracy : 98.72%
SVC RBF Accuracy : 98.46%
SVC Ploy Accuracy : 98.21%
SVC Sigmoid Accuracy : 36.36%


NAIVE BAYES

In [27]:
# Naive Bayes
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(X_train,Y_train)
print("Naive Bayes Accuracy : {:,.2f}%".format(nb.score(X_test,Y_test)*100))

Naive Bayes Accuracy : 98.21%


DECISION TREE

In [28]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(criterion='entropy',max_depth=4, random_state=0)
dt.fit(X_train,Y_train)
print("Decision Tree Accuracy : {:,.2f}%".format(dt.score(X_test,Y_test)*100))

Decision Tree Accuracy : 98.34%


XGBOOST

In [29]:
# XGboost
import xgboost
xg = xgboost.XGBClassifier()
xg.fit(X_train,Y_train)
print("XGboost accuracy : {:.2f}%".format(xg.score(X_test,Y_test)*100))

XGboost accuracy : 98.59%


BUILDING A PREDICTIVE SYSTEM

In [34]:
#input temperature
input_temp = (30.16)

In [35]:
# change the input_temp to a numpy array
input_temp_as_numpy = np.asarray(input_temp)

#reshaping the numpy array
input_temp_reshaped= input_temp_as_numpy.reshape(1,-1)
print(input_temp_reshaped)

[[30.16]]


In [36]:
#using a Logistic model
prediction = regressor.predict(input_temp_reshaped)
print(prediction)

[0]




OUTPUT

In [37]:
if(prediction[0]==2):
    print("clinical Mastitis Detected")
  
elif (prediction[0]==1):
    print ("subclinical mastitis Detected")

else:
    print("Healthy animal and no mastitis Detected")

Healthy animal and no mastitis Detected


SAVING THE TRAINED MODEL

In [None]:
import pickle

In [30]:
filename = 'temperature_model.pkl'


In [31]:
pickle.dump(regressor,open(filename,'wb'))

In [1]:
input_temp = (35.16)

# change the input_temp to a numpy array
input_temp_as_numpy = np.asarray(input_temp)

#reshaping the numpy array
input_temp_reshaped= input_temp_as_numpy.reshape(1,-1)
print(input_temp_reshaped)
pickled_model = pickle.load(open('temperature_model.pkl', 'rb'))
pickled_model.predict(input_temp_reshaped)
if(prediction[0]==2):
    print("clinical Mastitis Detected")
  
elif (prediction[0]==1):
    print ("subclinical mastitis Detected")

else:
    print("Healthy animal and no mastitis Detected")

NameError: name 'np' is not defined