## Importing necessary libraries

In [21]:
import pandas as pd

from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score,r2_score

## Import Data

In [2]:
Glass_data = pd.read_csv("glass.csv")
Glass_data

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,1
1,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,1
...,...,...,...,...,...,...,...,...,...,...
209,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,7
210,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,7
211,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,7
212,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,7


## Problem Statement : Prepare a model for glass classification using KNN

## Input : RI - Na - Mg - Al - Si - K - Ca - Ba - Fe
## Output : Type

## Data Understanding

In [5]:
Glass_data.shape

(214, 10)

In [6]:
Glass_data.describe()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
count,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0
mean,1.518365,13.40785,2.684533,1.444907,72.650935,0.497056,8.956963,0.175047,0.057009,2.780374
std,0.003037,0.816604,1.442408,0.49927,0.774546,0.652192,1.423153,0.497219,0.097439,2.103739
min,1.51115,10.73,0.0,0.29,69.81,0.0,5.43,0.0,0.0,1.0
25%,1.516523,12.9075,2.115,1.19,72.28,0.1225,8.24,0.0,0.0,1.0
50%,1.51768,13.3,3.48,1.36,72.79,0.555,8.6,0.0,0.0,2.0
75%,1.519157,13.825,3.6,1.63,73.0875,0.61,9.1725,0.0,0.1,3.0
max,1.53393,17.38,4.49,3.5,75.41,6.21,16.19,3.15,0.51,7.0


In [7]:
Glass_data.dtypes

RI      float64
Na      float64
Mg      float64
Al      float64
Si      float64
K       float64
Ca      float64
Ba      float64
Fe      float64
Type      int64
dtype: object

In [8]:
Glass_data.isnull().sum()

RI      0
Na      0
Mg      0
Al      0
Si      0
K       0
Ca      0
Ba      0
Fe      0
Type    0
dtype: int64

## Data Processing

### Since no null value no need of data cleaning

## Model Building

In [3]:
X = Glass_data.drop(['Type'],axis=1)
y = Glass_data['Type']

In [6]:
X.shape,y.shape

((214, 9), (214,))

In [33]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20,random_state= 15,stratify=y)
X_train.shape,y_train.shape,X_test.shape,y_test.shape

((171, 9), (171,), (43, 9), (43,))

In [34]:
Glass_knn_model = KNeighborsClassifier()
Glass_knn_model.fit(X_train,y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

## Model Testing 

In [35]:
y_pred = Glass_knn_model.predict(X_test)
y_pred

array([2, 1, 2, 7, 1, 7, 1, 2, 5, 7, 5, 1, 2, 7, 7, 1, 2, 2, 1, 1, 1, 7,
       1, 1, 1, 3, 1, 1, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 1, 5, 1, 7, 2],
      dtype=int64)

## Modal Evualation

In [36]:
print("Accuracy score   :",round(accuracy_score(y_test,y_pred),4))
print("R2 Score         :",round(r2_score(y_test,y_pred),4))

Accuracy score   : 0.6279
R2 Score         : 0.6477


## Lets choose best K value ,by defalut it taken K=5

In [40]:
neighbours = list(range(1,50,2))
cv_scores = []

for i in neighbours:
    Glass_knn_model = KNeighborsClassifier(n_neighbors = i)
    cv_score = cross_val_score(estimator = Glass_knn_model,X=X,y=y,cv=10)
    cv_scores.append(cv_score.mean())

In [23]:
import warnings
warnings.filterwarnings('ignore')

In [41]:
cv_scores

[0.6502164502164502,
 0.6502164502164502,
 0.6454545454545455,
 0.6270562770562771,
 0.5948051948051949,
 0.5898268398268398,
 0.6090909090909091,
 0.6041125541125542,
 0.6183982683982685,
 0.637012987012987,
 0.6277056277056278,
 0.6231601731601732,
 0.6186147186147186,
 0.6279220779220779,
 0.6277056277056278,
 0.6229437229437229,
 0.6136363636363636,
 0.6041125541125542,
 0.6134199134199134,
 0.5993506493506493,
 0.5805194805194804,
 0.5945887445887446,
 0.59025974025974,
 0.5759740259740259,
 0.5714285714285714]

In [42]:
neighbours[cv_scores.index(max(cv_scores))]

1

## As per above line to get better result K should be K=1

In [43]:
Glass_knn_model = KNeighborsClassifier(n_neighbors=1)
Glass_knn_model.fit(X_train,y_train)
y_pred = Glass_knn_model.predict(X_test)
print("Accuracy score   :", round(accuracy_score(y_test,y_pred),4))
print("R2 Score         :",round(r2_score(y_test,y_pred),4))

Accuracy score   : 0.6744
R2 Score         : 0.7907


## If we take K=5 then Accuracy is 0.6279
## If we take K=1 then Accuracy is 0.6744

## Model Deployment

In [37]:
from pickle import dump

In [38]:
dump(Glass_knn_model,open("Glass_knn_model.pkl",'wb'))

In [39]:
from pickle import load

In [44]:
Glass_load = load(open("Glass_knn_model.pkl",'rb'))

In [45]:
Glass_load.predict(X_test)

array([2, 1, 2, 7, 1, 7, 1, 2, 5, 7, 5, 1, 2, 7, 7, 1, 2, 2, 1, 1, 1, 7,
       1, 1, 1, 3, 1, 1, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 1, 5, 1, 7, 2],
      dtype=int64)