<a href="https://colab.research.google.com/github/vivek11416/dataScienceProjects/blob/main/dog_breed_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# download kaggle.json from kaggle account settings page
from google.colab import files
files.upload()

In [None]:
#install kaggle API client
!pip install -q kaggle

In [None]:
#the kaggle API client expects this file to be in kaggle folder
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

#update permission to avoig kaggle tool warning
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
#change current working directory
!mkdir dog_dataset
%cd dog_dataset

In [None]:
#getting dataset
!kaggle datasets list -s dogbreedidfromcomp

In [None]:
#downloading
!kaggle datasets download catherinehorng/dogbreedidfromcomp

In [None]:
#unzipping and removing unwanted files
!unzip /content/dog_dataset/dogbreedidfromcomp.zip -d dog_dataset
!rm /content/dog_dataset/dogbreedidfromcomp.zip
!rm /content/dog_dataset/sample_submission.csv

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from keras.preprocessing import image
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense,Dropout,Flatten,Conv2D,MaxPool2D
from tensorflow.keras.optimizers import Adam

In [None]:
labels_all = pd.read_csv("/content/dog_dataset/dog_dataset/labels.csv")
print(labels_all.shape)
labels_all.head()

In [None]:
# visualize each breed
breeds_all = labels_all['breed']
breed_count = breeds_all.value_counts()
breed_count.head()

In [None]:
#CLASS_NAMES = [list(set(breeds_all.tolist()))] # if computation power is not a limitation
CLASS_NAMES = ['scottish_deerhound','maltese_dog','bernese_mountain_dog']
labels = labels_all[(labels_all['breed'].isin(CLASS_NAMES))]
labels = labels.reset_index()
labels.head()



In [None]:
 #creating numpy mtrix with zeroes
 X_data = np.zeros((len(labels),224,224,3),dtype='float32')
 #one hot encoding
 Y_data = label_binarize(labels['breed'],classes = CLASS_NAMES)

 #converting image to numpy array and normalizing
 for i in tqdm(range(len(labels))):
   img = image.load_img('dog_dataset/train/%s.jpg' % labels['id'][i], target_size=(224,224))
   img = image.img_to_array(img)
   x = np.expand_dims(img.copy(),axis=0)
   X_data[i] = x / 255.0

  #one hot encode shape and size
print('\nTrain Images Shape: ',X_data.shape,' size: {:,}'.format(X_data.size))
print('One-hot encoded output shape: ',Y_data.shape,' size: {:,}'.format(Y_data.size))


In [None]:
# building the model
model = Sequential()

model.add(Conv2D(filters=64,kernel_size=(5,5),activation='relu',input_shape=(224,224,3)))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(filters=32,kernel_size=(3,3),activation='relu',kernel_regularizer='l2'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(filters=16,kernel_size=(7,7),activation='relu',kernel_regularizer='l2'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(filters=8,kernel_size=(5,5),activation='relu',kernel_regularizer='l2'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(128,activation="relu",kernel_regularizer='l2'))
model.add(Dense(64,activation="relu",kernel_regularizer='l2'))
model.add(Dense(len(CLASS_NAMES),activation="softmax"))

model.compile(loss='categorical_crossentropy',optimizer=Adam(0.0001),metrics=['accuracy'])
model.summary()

In [None]:
#splitting the data into training and testing
X_train_and_val,X_test,Y_train_and_val,Y_test = train_test_split(X_data,Y_data,test_size=0.1)
#splitting training data into training and validation set
X_train ,X_val,Y_train,Y_val = train_test_split(X_train_and_val,Y_train_and_val,test_size=0.2)

In [None]:
#Training the model
epochs = 100
batch_size = 128

history = model.fit(X_train,Y_train,batch_size=batch_size,epochs=epochs,validation_data=(X_val,Y_val))

In [None]:
model.save("dog_breed.h5")