In [13]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import cv2
import time
import sklearn.model_selection as skm

In [14]:
from google.colab import drive
from google.colab.patches import cv2_imshow

drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [15]:
import sys

sys.path.append("./gdrive/MyDrive/wildlife/Archeys_frogs")

In [16]:
from LandMarkDataGenerator import LandMarkDataGenerator
from LocalizationPointAccuracy import LocalizationPointAccuracy
from CNNBlock import CNNBlock

In [17]:
DB_FILE_PATH = r"./gdrive/MyDrive/wildlife/Archeys_frogs/image_path_anotations_db.pkl"

In [18]:
df = pd.read_pickle(DB_FILE_PATH)

In [19]:
df.head()

Unnamed: 0,image_path,x_Left_eye,y_Left_eye,x_Left_front_leg,y_Left_front_leg,x_Right_eye,y_Right_eye,x_Right_front_leg,y_Right_front_leg,x_Tip_of_snout,y_Tip_of_snout,x_Vent,y_Vent,original_width_size,original_height_size
0,/content/gdrive/MyDrive/wildlife/Archeys_frogs...,1126.923096,1039.332031,1264.653076,994.205688,1067.585571,816.12207,1248.02002,763.907959,993.96582,941.991577,1594.713135,811.38501,2628,1656
1,/content/gdrive/MyDrive/wildlife/Archeys_frogs...,712.792725,878.468506,816.583923,876.201294,694.75531,736.335205,764.705322,700.226685,624.80542,822.055176,1001.594116,785.980469,2497,1599
2,/content/gdrive/MyDrive/wildlife/Archeys_frogs...,885.637756,1007.863464,970.605286,1031.211304,908.882812,885.314575,989.759521,904.002441,824.620361,926.964722,1166.535889,1006.835266,2634,1750
3,/content/gdrive/MyDrive/wildlife/Archeys_frogs...,613.908813,1019.479797,743.594849,1035.384644,622.473022,813.939697,735.030701,766.224976,489.116608,906.922119,1102.06665,893.464111,2226,1663
4,/content/gdrive/MyDrive/wildlife/Archeys_frogs...,1449.327393,778.694946,1277.386475,764.939697,1443.595947,977.000183,1271.655029,1033.167603,1582.295044,899.05365,951.84491,926.564148,2655,1870


In [20]:
def create_train_val_test_split(df):
    unique_paths = df.image_path.apply(lambda path: os.path.dirname(path))
    unique_paths = unique_paths.unique()
    # Splitting so no frogs with the same id will be in different groups
    train_paths, test_paths = skm.train_test_split(unique_paths, test_size=0.3)
    test_paths, val_paths = skm.train_test_split(test_paths, test_size=0.5)
    train_df = df[
        df.image_path.apply(
            lambda x: np.any([k == os.path.dirname(x) for k in train_paths])
        )
    ]
    val_df = df[
        df.image_path.apply(
            lambda x: np.any([k == os.path.dirname(x) for k in val_paths])
        )
    ]
    test_df = df[
        df.image_path.apply(
            lambda x: np.any([k == os.path.dirname(x) for k in test_paths])
        )
    ]

    train_df.reset_index(drop=True, inplace=True)
    val_df.reset_index(drop=True, inplace=True)
    test_df.reset_index(drop=True, inplace=True)

    return train_df, val_df, test_df

In [None]:
train_df, val_df, test_df = create_train_val_test_split(df)

In [None]:
assert not np.any(train_df.image_path.isin(val_df.image_path))
assert not np.any(train_df.image_path.isin(test_df.image_path))
assert not np.any(test_df.image_path.isin(val_df.image_path))

In [None]:
train_df.to_pickle("./gdrive/MyDrive/wildlife/Archeys_frogs/train_db.pkl")
val_df.to_pickle("./gdrive/MyDrive/wildlife/Archeys_frogs/val_db.pkl")
test_df.to_pickle("./gdrive/MyDrive/wildlife/Archeys_frogs/test_db.pkl")

In [None]:
print("Train length: {0}".format(len(train_df)))
print("Val length: {0}".format(len(val_df)))
print("Test length: {0}".format(len(test_df)))