In [1]:
import os
import numpy as np
import pandas as pd
import keras
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50

from keras.models import Model
from keras.layers import Dense, Dropout, Flatten
from tqdm import tqdm
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import cv2
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, accuracy_score

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
# data_path = "/home/paperspace/data/dogBreed/"
data_path = r"D:\Projects\Datas\DogBreed"
im_size = 15

In [4]:
df_train = pd.read_csv(os.path.join(data_path, "labels.csv"))
df_test = pd.read_csv(os.path.join(data_path, "sample_submission.csv"))

In [5]:
df_train.head(10)

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever
5,002211c81b498ef88e1b40b9abf84e1d,bedlington_terrier
6,00290d3e1fdd27226ba27a8ce248ce85,bedlington_terrier
7,002a283a315af96eaea0e28e7163b21b,borzoi
8,003df8b8a8b05244b1d920bb6cf451f9,basenji
9,0042188c895a2f14ef64a918ed9c7b64,scottish_deerhound


In [6]:
targets_series = pd.Series(df_train["breed"])
one_hot = pd.get_dummies(targets_series, sparse=True)

In [7]:
one_hot_labels = np.asarray(one_hot)

In [8]:
one_hot_labels.shape

(10222, 120)

In [9]:
x_train = []
y_train = []
# x_test = []

In [10]:
i = 0
for f, breed in tqdm(df_train.values):
    img = cv2.imread(os.path.join(data_path, "train/{}.jpg".format(f)))
    label = one_hot_labels[i]
    x_train.append(cv2.resize(img, (im_size, im_size)))
    y_train.append(label)
    i += 1

100%|████████████████████████████████████████████████████████████████████████████| 10222/10222 [02:13<00:00, 76.84it/s]


In [11]:
y_train_raw = np.array(y_train, np.uint8)
x_train_raw = np.array(x_train, np.float32)/255.
x_train = None

In [12]:
print(x_train_raw.shape)
print(y_train_raw.shape)
# print(x_test.shape)

(10222, 15, 15, 3)
(10222, 120)


In [13]:
num_class = y_train_raw.shape[1]

In [14]:
# X_train, X_valid, Y_train, Y_valid = train_test_split(x_train_raw, y_train_raw, test_size=0.3, random_state=1)
train_propotion = 0.8
rnd = np.random.random(x_train_raw.shape[0])
train_idx = rnd < train_propotion
valid_idx = rnd >= train_propotion


In [15]:
x_train_raw.size

6899850

In [16]:
X_train, Y_train, X_valid, Y_valid = (x_train_raw[train_idx], y_train_raw[train_idx], 
                                      x_train_raw[valid_idx], y_train_raw[valid_idx])
x_train_raw = None
y_train_raw = None

In [None]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(im_size, im_size, 3), pooling="avg")
train_model_op = base_model.predict(X_train, batch_size=32, verbose=1)
valid_model_op = base_model.predict(X_valid, batch_size=32, verbose=1)

logreg = LogisticRegression(multi_class='multinomial', solver='lbfgs')
logreg.fit(train_model_op, (Y_train * range(num_class)).sum(axis=1))
valid_probs = logreg.predict_proba(valid_model_op)
valid_preds = logreg.predict(valid_model_op)

print('Validation VGG LogLoss {}'.format(log_loss(Y_valid, valid_probs)))
print('Validation VGG Accuracy {}'.format(accuracy_score((Y_valid * range(num_class)).sum(axis=1), valid_preds)))

In [None]:
valid_preds

In [None]:
Y_valid

In [None]:
# Resnet50
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(im_size, im_size, 3))
x = base_model.output
x = Flatten()(x)
x = Dense(2048, activation="relu")(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
predictions = Dense(num_class, activation="softmax")(x)

model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
    layer.trainable = False

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
callbacks_list = [keras.callbacks.EarlyStopping(monitor="val_acc", patience=3, 
                                               verbose=1)]
# model.summary()

In [None]:
# Resnet50
model.fit(X_train, Y_train, epochs=25, 
          validation_data=(X_valid, Y_valid), verbose=1)

In [None]:
# VGG
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(im_size, im_size, 3))
x = base_model.output
x = Flatten()(x)
x = Dense(2048, activation="relu")(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
predictions = Dense(num_class, activation="softmax")(x)

model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
    layer.trainable = False

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
callbacks_list = [keras.callbacks.EarlyStopping(monitor="val_acc", patience=3, 
                                               verbose=1)]
model.summary()

In [None]:
# VGG
model.fit(X_train, Y_train, epochs=25, 
          validation_data=(X_valid, Y_valid), verbose=1)