In [1]:
# this notebook is for taking the annotations from the whale_edge_annotation interface
# and using that information to create a dataset of extracted patches (size 16x16) -> label map
# the label map is a 16x16x3 map of classes, the classes being (0 - bg, 1 - whale, 2 - trailing edge)
# the key thing that allows us to do this is the assumption that, in general, above the trailing edge
# is going to be background, and below is going to be whale, and the pixels that make the edge itself
# are accurately the trailing edge. While this may not always be true (e.g. the edge coming up on the side)
# it will be true often enough for the network to be robust to noise
# the goal of this is then to run the resulting network in a fully convolutional manner and get a (rough)
# segmentation of the whale, with its trailing edge clearly marked

In [3]:
from os.path import join
from glob import glob
import json
with open('../dataset_loc','r') as f:
    dataset_loc = f.read().rstrip()
annotation_path = "/home/andrew/whale_edge_annotations/annotation_info"
imgs_dir = join(dataset_loc,'Flukes/CRC_combined constrained/')
all_annotations = glob(join(annotation_path, '*'))
annotations_done = {}
for annotation_fn in all_annotations:
    with open(annotation_fn, 'r') as f:
        annotation = json.load(f)
        if annotation[1]['done']:
            annotations_done[annotation[0]] = annotation[1]['path']

In [5]:
# it looks like the path data is stored from end to start (no biggie) in x, y format
# so the idea is to set a patch size of 16 x 16 and at every 16 points (w/ a 50 point offset from the edges)
# we also want a good idea of what the background / whale looks like, so we should move some amount of pixels
# down with the assumption (potentially wrong, but hopefully right most of the time) that putting the # path
# (mostly) at the bottom (and sampling k patches above) gives us background, 
# and putting it (mostly) at the top gives us whale (fg) (and going down k times etc)
# the latter is more likely to be incorrect, especially towards the edges (just in general)
# to compensate for this we'll have k increase as we get towards the center of the path
# while there are definitely deformed tails that will violate a lot of this, we can safely assume
# that most of the tails annotated do not have this issue
def extract_patches(path, img, edge_offset=50, patch_size=16, max_k=2):
    for sample_ind, patch_start in enumerate(path[edge_offset:-1*edge_offset:patch_size]):
        # extract the main patch with the center y at patch_start[y]
        right_col = patch_start[0]
        center_row = patch_start[1]
        
        # move up patch_size // 2 max_k times (or until you hit the top of the image)
        above_rows = [center_row - (patch_size // 2)*(k+1) for k in range(max_k)] # y decreases to go up
        
        # move down patch_size // 2 min(max_k - dist_from_center , 1) times
        distance_from_center = abs(sample_ind - (len(path[edge_offset:-1*edge_offset:patch_size])//2))
        below_rows = [center_row + (patch_size // 2)*(k+1) for k in range(min(1, max_k - distance_from_center))] 
        
        
        # for every patch, we know the starting y-value and the path in relation to it
        # thus we'll go by column and for each row if it's below the path it's whale, and if it's above it's
        # background.
        
    

[[891, 111],
 [890, 111],
 [889, 110],
 [888, 110],
 [887, 111],
 [886, 111],
 [885, 111],
 [884, 112],
 [883, 113],
 [882, 114],
 [881, 114],
 [880, 115],
 [879, 117],
 [878, 119],
 [877, 121],
 [876, 122],
 [875, 123],
 [874, 125],
 [873, 127],
 [872, 127],
 [871, 127],
 [870, 128],
 [869, 128],
 [868, 130],
 [867, 131],
 [866, 131],
 [865, 131],
 [864, 130],
 [863, 131],
 [862, 131],
 [861, 131],
 [860, 131],
 [859, 131],
 [858, 132],
 [857, 133],
 [856, 133],
 [855, 134],
 [854, 134],
 [853, 134],
 [852, 134],
 [851, 133],
 [850, 131],
 [849, 131],
 [848, 131],
 [847, 131],
 [846, 132],
 [845, 134],
 [844, 136],
 [843, 136],
 [842, 135],
 [841, 134],
 [840, 132],
 [839, 132],
 [838, 132],
 [837, 131],
 [836, 130],
 [835, 130],
 [834, 130],
 [833, 131],
 [832, 133],
 [831, 133],
 [830, 133],
 [829, 133],
 [828, 132],
 [827, 132],
 [826, 132],
 [825, 132],
 [824, 132],
 [823, 132],
 [822, 132],
 [821, 132],
 [820, 132],
 [819, 132],
 [818, 132],
 [817, 132],
 [816, 132],
 [815, 132],