<a href="https://colab.research.google.com/github/YolandaMDavis/wildtrack-iqa/blob/task-amenability/cropping_script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Image Cropping Script

This Notebook is used to generate cropped images using either annotaions provided by WildTrack or through the Footprint Detection Modle also provided by WildTrack.

## Data and Model Import

In [None]:
#Only needed to copy data to local drive can be skipped if zip file is already available in working folder
import shutil

# mount google drive
from google.colab import drive
drive.mount('/content/drive') # for google colab. adjust accordingly
PARENT_DIR = '/content/drive/MyDrive/Wildtrack Group/IQA' 

# copy and extract tar file
shutil.copy(PARENT_DIR + '/data/WildTrack_Raw.zip', 'WildTrack_Raw.zip')

Mounted at /content/drive


'WildTrack_Raw.zip'

In [None]:
from zipfile import ZipFile

# extract zip file
with ZipFile('WildTrack_Raw.zip', 'r') as zipObj:
   # Extract all the contents of zip file in current directory
   zipObj.extractall()

In [None]:
# copy and load model files over from share
import torch
import torchvision
from PIL import Image
import torchvision.transforms as T
import matplotlib.pyplot as plt
import cv2
import json
import numpy as np
import albumentations as A

shutil.copy(PARENT_DIR + '/data/annotations_bbox.csv', 'annotations_bbox.csv')
shutil.copy(PARENT_DIR + '/task-amenability/object/model.pt', 'object_model.pt')
shutil.copy(PARENT_DIR + '/task-amenability/object/class_mapping.json', 'class_mapping.json')
shutil.copy(PARENT_DIR + '/task-amenability/object/transforms.json', 'transforms.json')

with open('class_mapping.json') as data:
    mappings = json.load(data)

class_mapping = {item['model_idx']: item['class_name'] for item in mappings}
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.jit.load('object_model.pt').to(device)
transforms = A.load('transforms.json')

In [None]:
#import boundary box annotations provided by WildTrack
import pandas
import json
import re


annotations = pandas.read_csv('annotations_bbox.csv')
annotations[["image_name","labels"]].head()

image_bboxes = {}

for i in annotations.index:

    img_labels = annotations.iloc[i]["labels"][1:-1]
    img_bbox  = eval(re.search("\[.*]", img_labels).group())

    x_min = img_bbox[0]
    y_min = img_bbox[1]
    x_max = img_bbox[2]
    y_max = img_bbox[3]

    image_bboxes[annotations.iloc[i]["image_name"]] = (x_min, y_min, x_max, y_max)

## Define Image Cropping Functions

In [None]:
import os

def crop_image(image_location, image_name):

  image = Image.open(image_location)
  bands = image.getbands()
  image = np.array(image)

  if len(bands) == 1:
    print("B&W image")
    image = np.expand_dims(image, axis=2)  
    
  if image_name in image_bboxes.keys():
    print("Boundary Box exists for image: " + image_name)    
    bbox = image_bboxes[image_name] 
    x = bbox[0]
    y = bbox[1]
    w = bbox[2]
    h = bbox[3]
    cropped_image = image[y:y+h, x:x+w]
    try:
      return Image.fromarray(cropped_image)
    except TypeError:
      print("Caught Error For Image : " + image_name)
      return None
      
  else:
 #     print("No boundaries exists for image: " + image_name + ". Will use object model")
 #     with torch.no_grad():
 #       image = transforms(image=image)['image']
 #       x = torch.from_numpy(image).to(device)
 #       x = x.permute(2, 0, 1).float()
 #       y = model(x)

        # remove overlapped boxes
 #       no_overlap = torchvision.ops.nms(y['pred_boxes'], y['scores'], 0.5)
 #       y['scores'] = y['scores'][no_overlap]
 #       y['pred_boxes'] = y['pred_boxes'][no_overlap]
 #       y['pred_classes'] = y['pred_classes'][no_overlap]

        # keep only predictions with scores greater than .5
#        to_keep = (y['scores'] > 0.5).nonzero(as_tuple=True)[0]
#        y['pred_boxes'] = y['pred_boxes'][to_keep]
#        y['pred_classes'] = y['pred_classes'][to_keep] 

#        if len(y['pred_boxes'].tolist()) > 0:
#          pred_box = y['pred_boxes'].tolist()[0]
#          x = int(pred_box[0])
#          y = int(pred_box[1])
#          w = int(pred_box[2])
#          h = int(pred_box[3])
      
#          cropped_image = image[y:y+h, x:x+w]

#          if len(bands) == 1:
#            cropped_image = np.squeeze(cropped_image, axis=2)

#          return Image.fromarray(cropped_image)

#        else:
      return None

def generate_data_sets(rootdir='CROPPED'):

    image_reference_list = []
    subdirectories = list(os.walk('RAW', topdown=False))[:-1]
    
    for subdir in subdirectories:
        image_location = subdir[0]
        images = subdir[2]
        species_rating = image_location.rsplit('/', 1)[-1].replace('_', ' ')
        subjective_score = int(species_rating.rsplit(' ', 1)[-1])
        species_class = species_rating.rsplit(' ', 1)[:-1][0]

        if len(species_class.rsplit(' ', 1)) > 1:
            species = species_class.rsplit(' ')[0]
            animal_class = ' '.join(species_class.rsplit(' ')[1:])
        else:
            animal_class = 'Unknown'
            species = species_class

        os.makedirs(rootdir + '/' + image_location)    

        for image in images:
            image_path = image_location + '/' + image
            cropped_img = crop_image(image_location + '/' + image, image)
            if cropped_img is not None:
              cropped_img_path = rootdir + '/' + os.path.splitext(image_path)[0] + '_.jpg'
              print(cropped_img_path)
              cropped_img.save(cropped_img_path)
            

## Generate Cropped Images

In [None]:
rootdir = 'CROPPED'
generate_data_sets(rootdir)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
CROPPED/RAW/Otter_Eurasian_3/f29d7943ce0911ea952e0242ac110002_.jpg
Boundary Box exists for image: 6649b6bcc0fd11ea82a50242ac1c0002.jpg
CROPPED/RAW/Otter_Eurasian_3/6649b6bcc0fd11ea82a50242ac1c0002_.jpg
Boundary Box exists for image: e169b1005fde11eb99da00155d158bb6.jpg
CROPPED/RAW/Otter_Eurasian_3/e169b1005fde11eb99da00155d158bb6_.jpg
Boundary Box exists for image: 596310e0c0fa11ea82a50242ac1c0002.jpg
CROPPED/RAW/Otter_Eurasian_3/596310e0c0fa11ea82a50242ac1c0002_.jpg
Boundary Box exists for image: c357304034e911ebaebf00155d6f534b.jpg
CROPPED/RAW/Otter_Eurasian_3/c357304034e911ebaebf00155d6f534b_.jpg
Boundary Box exists for image: 4cd61536c0fd11ea82a50242ac1c0002.jpg
CROPPED/RAW/Otter_Eurasian_3/4cd61536c0fd11ea82a50242ac1c0002_.jpg
Boundary Box exists for image: c33958280a8b11eb9d950242ac110002.jpg
CROPPED/RAW/Otter_Eurasian_3/c33958280a8b11eb9d950242ac110002_.jpg
Boundary Box exists for image: 97beeecec0fd11ea82a50242ac1

## Compress and move data to share

In [None]:
shutil.make_archive('Annotated_Cropped_WildTrack', 'zip', rootdir)

'/content/Annotated_Cropped_WildTrack.zip'

In [None]:
shutil.copy('Annotated_Cropped_WildTrack.zip', PARENT_DIR + '/data/Annotated_Cropped_WildTrack.zip')

'/content/drive/MyDrive/Wildtrack Group/IQA/data/Annotated_Cropped_WildTrack.zip'