In [5]:
# we can use multiple data sources for this
# Flukebook
from __future__ import division, print_function
from os.path import join
import cPickle as pickle


with open('../dataset_loc', 'r') as f:
    dataset_loc = f.read().rstrip()
    
with open(join(dataset_loc, 'Flukes/Flukebook/flukebook_points_orig.pkl'), 'r') as f:
    fb_points = pickle.load(f)
print(len(fb_points))

1577


In [6]:
# Zooniverse
with open(join(dataset_loc, 'ibs_points_allindv.pkl'), 'r') as f:
    zsl_points = pickle.load(f)
    
print(len(zsl_points))

1970


In [41]:
# Our own annotations
from glob import glob
import json
annotation_path = "/home/andrew/envs/whaleFlukes/whale_edge_annotations/annotation_info"

all_annotations = glob(join(annotation_path, '*'))
annotation_points = []
for annotation_fn in all_annotations:
    with open(annotation_fn, 'r') as f:
        annotation = json.load(f)
        if annotation[1]['done'] and not annotation[1]['bad'] and not annotation[1]['notchSubmerged']:
            this_annot = {'fn':annotation[0]}
            this_annot['left'] = annotation[1]['topInfo']['path'][-1]
            this_annot['right'] = annotation[1]['topInfo']['path'][0]
            this_annot['notch'] = annotation[1]['notch']
            annotation_points.append(this_annot)
print(len(annotation_points))

489


In [22]:
fb_imgs_dir = join(dataset_loc, "Flukes/Flukebook/images")
zsl_and_annot_imgs_dir  = join(dataset_loc, "Flukes/CRC_combined constrained")

In [47]:
import cv2
import numpy as np

fixed_shape = (224, 224) # so I can transfer over Imagenet trained models

def resize_point(new_shape, old_shape, point):
    # assume point is x, y
    scale_x = new_shape[1] / old_shape[1]
    scale_y = new_shape[0] / old_shape[0]
    
    new_pt = np.array([int(point[0]*scale_x), int(point[1]*scale_y)]).reshape(1,-1)
    return new_pt

def convert_img_points(new_size, img, left, right, notch):
    # assume new_size is y, x (makes sense for everything except cv2)
    resized_img = cv2.resize(img, new_size[::-1])

    resized_left = resize_point(new_size, img.shape, left)
    resized_right = resize_point(new_size, img.shape, right)
    resized_notch = resize_point(new_size, img.shape, notch)
    
    return resized_img, resized_left, resized_right, resized_notch
    
xyc = lambda pt: (pt['x'], pt['y'])
    
# these need to stay parallel
fb_kp_imgs = []
fb_kp_pts = []
for annot in fb_points:
    img = cv2.imread(join(fb_imgs_dir, annot['fn']))
    resized_img, res_left, res_right, res_notch = convert_img_points(fixed_shape, img, 
                                     xyc(annot['points']['left']), xyc(annot['points']['right']), 
                                                                     xyc(annot['points']['notch']))
    fb_kp_imgs.append(resized_img)
    fb_kp_pts.append(np.hstack([res_left, res_right, res_notch]))
# guide: so each row of <src>_kp_pts is going to be the x, y coords of each point in left, right, notch order

In [42]:
zsl_kp_imgs = []
zsl_kp_pts = []

seen_imgs = set()

zsl_points.keys()

for imgn in zsl_points:
    img = cv2.imread(join(zsl_and_annot_imgs_dir, imgn))
    resized_img, res_left, res_right, res_notch = convert_img_points(fixed_shape, img,
                                        zsl_points[imgn]['left'], zsl_points[imgn]['right'], 
                                                                     zsl_points[imgn]['notch'])
    seen_imgs.add(imgn)
    zsl_kp_imgs.append(resized_img)
    zsl_kp_pts.append(np.hstack([res_left, res_right, res_notch]))

In [45]:
annot_kp_imgs = []
annot_kp_pts = []

for annot in annotation_points:
    if annot['fn'] in seen_imgs:
        print("Already saw %s in the ZSL data, skipping" % annot['fn'])
        continue
    img = cv2.imread(join(zsl_and_annot_imgs_dir, imgn))
    resized_img, res_left, res_right, res_notch = convert_img_points(fixed_shape, img,
                                        annot['left'], annot['right'], annot['notch'])
    annot_kp_imgs.append(resized_img)
    annot_kp_pts.append(np.hstack([res_left, res_right, res_notch]))

Already saw 13716-RWB081604_1241.jpg in the ZSL data, skipping
Already saw 20120505-DB-4844.jpg in the ZSL data, skipping
Already saw CINMS_20120713_A7491.jpg in the ZSL data, skipping
Already saw 20100819-Frediani-6417.jpg in the ZSL data, skipping
Already saw 12413-IMG_1268.jpg in the ZSL data, skipping
Already saw JKJ-20120909-8605.jpg in the ZSL data, skipping
Already saw 11833-JAC-20050911-0058.jpg in the ZSL data, skipping
Already saw 10312-JAC01-61_13Edit.jpg in the ZSL data, skipping
Already saw CINMS-20090417-A2446.jpg in the ZSL data, skipping
Already saw 20101014-JAC-0164.jpg in the ZSL data, skipping
Already saw 11902-r043-39f03.jpg in the ZSL data, skipping
Already saw 20130930-DB-LA3A0580.jpg in the ZSL data, skipping
Already saw CRC20120513-CTC-IMG_1725.jpg in the ZSL data, skipping
Already saw BG-20120912-3026.jpg in the ZSL data, skipping
Already saw 20100729-JAC-0026.jpg in the ZSL data, skipping
Already saw 11791-TEC03-3-16.jpg in the ZSL data, skipping
Already saw 1

In [46]:
len(annot_kp_imgs)

262

In [50]:
from itertools import chain
all_kp_imgs = list(chain(fb_kp_imgs, zsl_kp_imgs, annot_kp_imgs))
all_kp_pts = list(chain(fb_kp_pts, zsl_kp_pts, annot_kp_pts))

from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split

kp_imgs, kp_pts = shuffle(all_kp_imgs, all_kp_pts)
train_kp_imgs, test_kp_imgs, train_kp_pts, test_kp_pts = train_test_split(kp_imgs, kp_pts)
train_kp_imgs, val_kp_imgs, train_kp_pts, val_kp_pts = train_test_split(train_kp_imgs, train_kp_pts)

print(len(train_kp_imgs))
print(len(val_kp_imgs))
print(len(test_kp_imgs))

2142
714
953
