In [None]:
import random

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

from imutils import paths

from tensorflow.keras.utils import to_categorical

from tensorflow.keras.applications.resnet50 import ResNet50

from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input

from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
from sklearn.metrics import jaccard_score

from tensorflow.keras.models import load_model

from sklearn.preprocessing import LabelBinarizer

from sklearn.model_selection import KFold
from sklearn.metrics import classification_report


In [None]:
!unzip "dataset_eyes.zip" -d "/content/"

In [None]:
# HyperParameters

INIT_LR = 1e-4
EPOCHS = 5
BS = 32

In [None]:
# load data and balance dataset.

# there are 29 dogs.
# dataset_eyes includes 10,860 pics:
# 1,941 labeled "yes" (69 videos), 8,919 labeled "no" (189 videos).

# our aim is to take a total of 3,840 pics, ~0.5 of "yes" and ~0.5 of "no",
# which represent most of the dogs.

imagePaths = list(paths.list_images("eyes up"))
imagePaths.sort()


img_yes =imagePaths[8919:]
img_yes_copy = img_yes.copy()
random.shuffle(img_yes_copy)
img_yes_final = img_yes_copy

img_no = imagePaths[:8919]
img_no_copy = img_no.copy()
random.shuffle(img_no_copy)
img_no_final = img_no_copy[:1899]

# check that pics of every label (yes and no) reresent as many videos as possible.

def imge_chcker(img_list):
  img_check = []
  for path in img_list:
    img_check.append(path.split(os.path.sep)[-1].split("-")[-2])
  img_set = set(img_check)
  return img_set

img_check_yes = imge_chcker(img_yes_final)
print("img_check_yes: ",len(img_check_yes), "videos")

img_check_no = imge_chcker(img_no_final)
print("img_check_no: ",len(img_check_no), "videos")

# and the final data for our model (train + test), 3,840 pics.

equalized_imagePaths = img_yes_final + img_no_final
random.shuffle(equalized_imagePaths)

print (equalized_imagePaths)

In [None]:
# preparing data and labels for model

data = []
labels = []

for imagePath in equalized_imagePaths:

  # extract the class label from the filename
	label = imagePath.split(os.path.sep)[-2]

	# load the input image (224x224) and preprocess it
	image = load_img(imagePath, target_size=(224, 224))
	image = img_to_array(image)
	image = preprocess_input(image)

	# update the data and labels lists, respectively
	data.append(image)
	labels.append(label)

# perform one-hot encoding on the labels

lb = LabelBinarizer()
labels = lb.fit_transform(labels)
labels = to_categorical(labels, num_classes = 2)

# convert the data to NumPy arrays
data = np.array(data, dtype="float32")


In [None]:
# preparing data for Kfold

action_units = pd.read_csv('video name and dog name.csv')
action_units_new = action_units[['video', 'DogName']]

def requested_dogs (dogs):
    videos = []
    for dog in dogs:
        for idx, name in enumerate(action_units_new["DogName"]):
            if name == dog:
                videos.append(action_units_new["video"][idx])
    return videos


def new_list_dogs (videos):
    specific_dogs = []
    specific_dogs_labels = []
    for idx, imagePath in enumerate(equalized_imagePaths):
        if int(imagePath.split(os.path.sep)[-1].split("-")[-2]) in videos:
            specific_dogs.append(data[idx])
            specific_dogs_labels.append(labels[idx])
    return np.array(specific_dogs), np.array(specific_dogs_labels)


# list of 29 dog names arranged by ABC

dog_list = list(set(action_units_new["DogName"]))
dog_list.sort()

# dogs for final test, which we are not training at all

indices_final_test = [3, 4, 5]
dogs_final_test = [dog_list[index] for index in indices_final_test]
X_final_test, y_final_test = new_list_dogs(requested_dogs ([dog_list[index] for index in indices_final_test]))

# dogs for train (and validation)

dog_list_train =[dog for dog in dog_list if dog not in dogs_final_test]



In [None]:
# preparing pics for KFold

kf = KFold(n_splits=3)
folds = []

for train_index, test_index in kf.split(dog_list_train):

     print("TRAIN:", train_index.tolist(), "VALIDATION:", test_index.tolist())

     indices_train, indices_test = train_index.tolist(), test_index.tolist()

     X_train, y_train = new_list_dogs(requested_dogs ([dog_list_train[index] for index in indices_train]))
     X_test, y_test = new_list_dogs(requested_dogs ([dog_list_train[index] for index in indices_test]))

     new_fold = [X_train,y_train,X_test,y_test]
     folds.append(new_fold)



TRAIN: [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] VALIDATION: [0, 1, 2, 3, 4, 5, 6, 7, 8]
TRAIN: [0, 1, 2, 3, 4, 5, 6, 7, 8, 18, 19, 20, 21, 22, 23, 24, 25] VALIDATION: [9, 10, 11, 12, 13, 14, 15, 16, 17]
TRAIN: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] VALIDATION: [18, 19, 20, 21, 22, 23, 24, 25]


In [None]:

# train the model

fold_no = 1
acc_per_fold = []
loss_per_fold = []

for fold in folds:
    X_train, y_train = fold[0],fold[1]
    X_test, y_test = fold[2],fold[3]

    # construct the model

    # construct the training image generator for data augmentation
    aug = ImageDataGenerator(featurewise_std_normalization=True, fill_mode="nearest")
    it = aug.flow(X_train, y_train)

    baseModel = ResNet50(weights="imagenet", include_top=False,
      input_tensor=Input(shape=(224, 224, 3)))

    # construct the head of the model that will be placed on top of the
    # the base model
    headModel = baseModel.output
    headModel = AveragePooling2D(pool_size=(7, 7))(headModel)
    headModel = Flatten(name="flatten")(headModel)
    headModel = Dense(128, activation="relu")(headModel)
    headModel = Dropout(0.5)(headModel)
    headModel = Dense(2, activation="softmax")(headModel)

    # place the head FC model on top of the base model (this will become
    # the actual model we will train)
    model = Model(inputs=baseModel.input, outputs=headModel)

    # loop over all layers in the base model and freeze them so they will
    # *not* be updated during the first training process
    for layer in baseModel.layers:
      layer.trainable = False

    opt = Adam(learning_rate=INIT_LR)
    model.compile(loss="categorical_crossentropy", optimizer=opt ,metrics=["accuracy"])

    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')

    # Fit data to model
    history = model.fit(it,
                steps_per_epoch=len(X_train) // BS,
                epochs=EPOCHS,
                verbose=2)

    # plot the training loss and accuracy
    #print('------------------------------------------------------------------------')
    #print(f'Graph for fold {fold_no}')
    plt.style.use("ggplot")
    plt.figure()
    plt.plot(np.arange(0, EPOCHS), history.history["loss"], label="train_loss")
    plt.plot(np.arange(0, EPOCHS), history.history["accuracy"], label="train_acc")
    plt.title("Training Loss and Accuracy")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend(loc="lower left")


    # Generate generalization metrics
    scores = model.evaluate(X_test, y_test, verbose=0)

    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])

    # Increase fold number
    fold_no = fold_no + 1

# == Provide average scores ==
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(acc_per_fold)):
  print('------------------------------------------------------------------------')
  print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> Loss: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')
