In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

In [2]:
glass_df = pd.read_csv("glass.csv")
glass_df.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [3]:
glass_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 214 entries, 0 to 213
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   RI      214 non-null    float64
 1   Na      214 non-null    float64
 2   Mg      214 non-null    float64
 3   Al      214 non-null    float64
 4   Si      214 non-null    float64
 5   K       214 non-null    float64
 6   Ca      214 non-null    float64
 7   Ba      214 non-null    float64
 8   Fe      214 non-null    float64
 9   Type    214 non-null    int64  
dtypes: float64(9), int64(1)
memory usage: 16.8 KB


In [4]:
glass_df.shape

(214, 10)

In [5]:
glass_df['Type'].value_counts()

2    76
1    70
7    29
3    17
5    13
6     9
Name: Type, dtype: int64

In [6]:
glass_df['Type'] = glass_df['Type'].replace({1:'building_windows_float_processed',2:'building_windows_non_float_processed',3:'vehicle_windows_float_processed',5:'containers',6:'tableware',7:'headlamps'})
glass_df.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,building_windows_float_processed
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,building_windows_float_processed
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,building_windows_float_processed
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,building_windows_float_processed
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,building_windows_float_processed


In [7]:
glass_df.dtypes

RI      float64
Na      float64
Mg      float64
Al      float64
Si      float64
K       float64
Ca      float64
Ba      float64
Fe      float64
Type     object
dtype: object

In [8]:
X = glass_df.iloc[:,0:9]
Y = glass_df.iloc[:,9]

In [9]:
from sklearn.model_selection import GridSearchCV
n_neighbors = np.array(range(1,40))
metric = ['euclidean','minkowski']
param_grid = dict(n_neighbors=n_neighbors,metric=metric)

In [10]:
model = KNeighborsClassifier()
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid.fit(X, Y)
print(grid.best_score_)
print(grid.best_params_)

0.640531561461794
{'metric': 'euclidean', 'n_neighbors': 1}


In [11]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size = 0.2,random_state = 23)

In [12]:
model = KNeighborsClassifier(n_neighbors=1,metric = 'euclidean')
modelkn = model.fit(x_train,y_train)

In [13]:
preds = modelkn.predict(x_test)
preds

array(['headlamps', 'building_windows_float_processed',
       'building_windows_float_processed',
       'building_windows_non_float_processed',
       'building_windows_float_processed',
       'building_windows_non_float_processed',
       'building_windows_float_processed', 'headlamps',
       'building_windows_float_processed',
       'building_windows_float_processed',
       'building_windows_non_float_processed',
       'building_windows_float_processed',
       'building_windows_float_processed',
       'building_windows_non_float_processed', 'containers', 'containers',
       'building_windows_non_float_processed',
       'building_windows_non_float_processed', 'headlamps',
       'building_windows_non_float_processed',
       'building_windows_non_float_processed',
       'building_windows_float_processed', 'headlamps',
       'building_windows_float_processed', 'containers',
       'building_windows_float_processed',
       'building_windows_non_float_processed',
       'bu

In [14]:
np.mean(preds==y_test)*100

76.74418604651163

In [15]:
df_t = pd.DataFrame({'Actual':y_test, 'Predicted':preds})
df_t

Unnamed: 0,Actual,Predicted
212,headlamps,headlamps
46,building_windows_float_processed,building_windows_float_processed
52,building_windows_float_processed,building_windows_float_processed
183,tableware,building_windows_non_float_processed
19,building_windows_float_processed,building_windows_float_processed
139,building_windows_non_float_processed,building_windows_non_float_processed
44,building_windows_float_processed,building_windows_float_processed
213,headlamps,headlamps
64,building_windows_float_processed,building_windows_float_processed
14,building_windows_float_processed,building_windows_float_processed


In [16]:
from sklearn.metrics import classification_report,confusion_matrix,f1_score,accuracy_score
confusion_matrix = confusion_matrix(y_test,preds)
print(confusion_matrix)

[[13  0  0  0  0  0]
 [ 2 14  2  0  1  0]
 [ 0  0  1  0  0  0]
 [ 0  0  0  4  1  0]
 [ 0  1  0  0  1  0]
 [ 3  0  0  0  0  0]]


In [17]:
print(classification_report(y_test,preds))

                                      precision    recall  f1-score   support

    building_windows_float_processed       0.72      1.00      0.84        13
building_windows_non_float_processed       0.93      0.74      0.82        19
                          containers       0.33      1.00      0.50         1
                           headlamps       1.00      0.80      0.89         5
                           tableware       0.33      0.50      0.40         2
     vehicle_windows_float_processed       0.00      0.00      0.00         3

                            accuracy                           0.77        43
                           macro avg       0.55      0.67      0.58        43
                        weighted avg       0.77      0.77      0.75        43



In [18]:
accuracy_score(y_test,preds)*100

76.74418604651163