### **Importing Libraries**

In [1]:
import numpy as np
import os
import pandas as pd
from matplotlib import pyplot as plt
import cv2
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score


%matplotlib inline

### **Importing image dataset**

In [2]:
def create_dataset(img_folder): 
##gets the data from the files and create array of that image
    img_data_array=[]  ## all images list
    class_name=[]  ## all the target class of the images
    for dir in os.listdir(img_folder):  ## for selecting class directory
        for img in os.listdir(os.path.join(img_folder,dir)): 
              ## selecting image from the directory
            try:
                imagepath=os.path.join(img_folder,dir,img)
                if(imagepath.split(".")[-1]=="gif"): ## removing the gif from the data
                    continue
                image=cv2.imread(imagepath,cv2.COLOR_BGR2RGB) ## reading the image 
                        ##reszing the image
                image=cv2.resize(image,(200,200),interpolation=cv2.INTER_AREA).flatten()
                        ## converting it to array of float type
                image=np.array(image.astype("float32"))
                        ## appending the image and it's target class
                img_data_array.append(image)
                class_name.append(dir)
            except Exception as e:
                continue   
    return np.array(img_data_array),class_name      

**Importing the dataset from the directory based on collab**

In [13]:
## the directory path can vary it is based on the path i have donwloaded dataset to
img_data,class_name=create_dataset("dataset/train/")

KeyboardInterrupt: 

### **Dictionary to Convert Target Class to Integer**

In [None]:
## creating a dictionary to label each target class in the form of integer
targetClasses={key:value for value,key in enumerate(np.unique(class_name))}
getClasses={value:key for value,key in enumerate(np.unique(class_name))}

**Encoding the Target Class**

In [None]:
## changing the target classs to integer instead of string
targetClss=[targetClasses[class_name[i]] for i in range(len(class_name))]

### **Size of the dataset**

In [10]:
print(f"The size of the dataset: {len(img_data)}")
print(f"The Shape of the Dataset: {img_data.shape}")

The size of the dataset: 0
The Shape of the Dataset: (0,)


### **Shuffling and Spliting the Data**

In [None]:
## shuffling to create randomness in the data
temp=list(zip(img_data,targetClss))
np.random.shuffle(temp)
img_data,targetClss=zip(*temp)
## spliting into training and testing
x_train=img_data[:2012]
x_test=img_data[2012:]
y_train=targetClss[:2012]
y_test=targetClss[2012:]

### **Size of train and test data**

In [None]:

print(f"Total Number of Train Data: {len(x_train)}")
print(f"Total Number of Test Data: {len(x_test)}")


### **Visualization of data**

**Total Test Cases in each Class**

In [None]:
targetClssCounts={}
for tar in targetClasses:
    targetClssCounts[tar]=class_name.count(tar)
  
# print(targetClssCounts)
# print(list(range(len(targetClssCounts))))
# print(list(targetClssCounts.values()))
# print(list(targetClssCounts.keys()))
plt.figure(figsize=(10,5))
plt.bar(list(range(len(targetClssCounts))),list(targetClssCounts.values()),
        tick_label=list(targetClssCounts.keys()),width=0.8)
plt.xlabel("TargetClasses")
plt.ylabel("No. of Data")
plt.title("Total Data for each Target Class")
plt.show()

**Data Distribution in Test Test**

In [None]:
targettestClssCounts={}
for tar in targetClasses:
    targettestClssCounts[tar]=y_test.count(targetClasses[tar])


plt.figure(figsize=(10,5))
plt.bar(list(range(len(targettestClssCounts))),list(targettestClssCounts.values()),
        tick_label=list(targettestClssCounts.keys()),width=0.8)
plt.xlabel("TargetClasses")
plt.ylabel("No. of Data")
plt.title("Data Distribution in Test Split")
plt.show()

**Data Distribution in Train Split**

In [None]:
targettrainClssCounts={}
for tar in targetClasses:
    targettrainClssCounts[tar]=y_train.count(targetClasses[tar])

plt.figure(figsize=(10,5))
plt.bar(list(range(len(targettrainClssCounts))),list(targettrainClssCounts.values()),
        tick_label=list(targettrainClssCounts.keys()),width=0.8)
plt.xlabel("TargetClasses")
plt.ylabel("No. of Data")
plt.title("Data Distribution in Test Split")
plt.show()    

## **Trainig on Different Models and Evaluation**

### **Model 1: K Neighbor Classifier**

**Initializing The model with 6 neighbors**

In [None]:

knnClss=KNeighborsClassifier(n_neighbors=6)
knnClss.fit(x_train,y_train)

**Predicting for the test data:**

In [None]:
predicted=[]
for line in x_test:
  
  predicted.append(knnClss.predict([line]))

**Accuracy of the model and F1 accuracy for each Class**

In [None]:
print(f"Accuracy of the model: {accuracy_score(predicted,y_test)}")
print(f"F1 Score of the model:\n{f1_score(y_test,predicted,average=None)}")


**Confusion Matrix**

In [None]:
cm = confusion_matrix(y_test,predicted)
plt.figure(figsize=(16,16))
sns.heatmap(cm,annot=True, fmt='.0f', linewidths=.5, square=True,cmap='Blues_r')
plt.ylabel('Actual label')
plt.xlabel('Predicted label')
all_sample_title = 'Accuracy Score: {0}'.format(accuracy_score(y_test,predicted))
plt.title(all_sample_title,size=15)

**Predicting for Random Data**

In [None]:
temp=list(zip(img_data,targetClss))
np.random.shuffle(temp)
img_data,targetClss=zip(*temp)

for i in range(10):
  result=knnClss.predict([img_data[i]])
  print(f"Actucal Class : {getClasses[targetClss[i]]} Predicted Class: {getClasses[result[0]]}")
    

### **Model 2: Multinomial Logistic Regression Model**

**Initialzing the model**

In [None]:
LogReg = LogisticRegression(multi_class="multinomial",solver='lbfgs', max_iter=2000)

**Training the Model**

In [None]:
LogReg.fit(x_train,y_train)

**Predicting for the test cases**

In [None]:
y_pred = LogReg.predict(x_test)

**Accuracy of the model and F1 accuracy for each Class**

In [None]:
print(f"Accuracy of the model : {accuracy_score(y_test,y_pred)}")
print(f"F1 Score of the model:\n{f1_score(y_test,y_pred,average=None)}")

**Confusion Matrix**

In [None]:
cm = confusion_matrix(y_test,y_pred)
plt.figure(figsize=(16,16))
sns.heatmap(cm,annot=True, fmt='.0f', linewidths=.5, square=True,cmap='Blues_r')
plt.ylabel('Actual label')
plt.xlabel('Predicted label')
all_sample_title = 'Accuracy Score: {0}'.format(accuracy_score(y_test,y_pred))
plt.title(all_sample_title,size=15)

**Predicting for Random Data**

In [None]:
temp=list(zip(img_data,targetClss))
np.random.shuffle(temp)
img_data,targetClss=zip(*temp)

for i in range(10):
  result=LogReg.predict([img_data[i]])
  print(f"Actucal Class : {getClasses[targetClss[i]]} Predicted Class: {getClasses[result[0]]}")
    

### **Model 3: Decision Tree With Max Depth of 12**

**Initializing the Model**

In [None]:
DecTree=DecisionTreeClassifier(criterion='entropy', max_depth=12, min_samples_leaf=8, random_state=100)

**Trainig The Model**

In [None]:
DecTree.fit(x_train,y_train)

**Predicting for Test Cases**

In [None]:
ypred=DecTree.predict(x_test)

**Accuracy of the model and F1 accuracy for each Class**

In [None]:
print(f"Accuracy of the model : {accuracy_score(y_test,ypred)}")
print(f"F1 Score of the model:\n{f1_score(y_test,ypred,average=None)}")

**Confusion Matrix**

In [None]:
cm = confusion_matrix(y_test,ypred)
plt.figure(figsize=(16,16))
sns.heatmap(cm,annot=True, fmt='.0f', linewidths=.5, square=True,cmap='Blues_r')
plt.ylabel('Actual label')
plt.xlabel('Predicted label')
all_sample_title = 'Accuracy Score: {0}'.format(accuracy_score(y_test,ypred))
plt.title(all_sample_title,size=15)

**Predicting for Random Data**

In [None]:
temp=list(zip(img_data,targetClss))
np.random.shuffle(temp)
img_data,targetClss=zip(*temp)

for i in range(10):
  result=DecTree.predict([img_data[i]])
  print(f"Actucal Class : {getClasses[targetClss[i]]} Predicted Class: {getClasses[result[0]]}")
    

# **Summary of the Models**

In [None]:
print(f"The Accuracy of KNN is {accuracy_score(y_test,predicted)}")
print(f"The Accuracy of Logistic Regression is {accuracy_score(y_test,y_pred)}")
print(f"The Accuracy of Decision Tree is {accuracy_score(y_test,ypred)}")
