# Create Mini Crops for the Labeled Dataset

With inspiration using PIL from here https://stackoverflow.com/questions/51379338/image-processing-with-single-to-multiple-images

Plus 2020-03-14-ABCrop.ipynb on sidewalk-cv

In [32]:
import os
from PIL import Image
from io import BytesIO

import numpy as np
import pandas as pd
#from pano_feats import Pano
#import mini_utils
import s3fs
import boto3

In [9]:
pd.set_option('max_colwidth', -1)

In [28]:
bucket_img = "streetview-w210"
bucket_crops = "gsv-crops2"

# Load and view the images
fs = s3fs.S3FileSystem()
s3 = boto3.client('s3')
s3_r = boto3.resource('s3')

def make_mini_crops(img_id, bucket_img = bucket_img, bucket_crops=bucket_crops):
    # Create a dictionary of all crop values
    # key = crop_num, value = (sv_x, sv_y, crop_size)
    crop_dict = {'A':(5  ,320,180),
                 'B':(95,320,180),
                 'C':(185,320,180),
                 'D':(275,320,180),
                 'E':(365,320,180),
                 'F':(455,320,180)}
    crop_id_list = ['A', 'B', 'C', 'D', 'E', 'F']
    # Get the original image file
    path_to_image = "gsv/" + img_id + ".jpg"
    img_file = s3.get_object(Bucket=bucket_img, Key=path_to_image)['Body'].read()
    im = Image.open(BytesIO(img_file))
    
    s3.put_object(Bucket=bucket_crops, Key=(r'mini-crops/'))
    # Loop over all image crops here so we don't have to pull the image 6 times
    for ii, crop_num in enumerate(crop_id_list):
        img_filename  = 'mini-crops/' + img_id + '_' + str(crop_num) + '.jpg' 
        
        # Create the cropped image using passed in x,y coordinates as upper left corner
        sv_x, sv_y, crop_size = crop_dict[crop_num]
        cropped_square = im.crop((sv_x, sv_y, sv_x + crop_size, sv_y + crop_size))
        
        # Save the cropped file to S3
        buffer = BytesIO()
        cropped_square.save(buffer,'JPEG')
        buffer.seek(0)
        s3.put_object(Bucket=bucket_crops, Key=img_filename, Body=buffer)


def make_sliding_window_crops(list_img_id, bucket_crops = bucket_crops, bucket_img = bucket_img):
    ''' take a text file containing a list of panos and add to dir'''

    num_panos = 0
    num_suc = 0
    num_fail  = 0

    for img_id in list_img_id:

        try:
            make_mini_crops(img_id, bucket_img, bucket_crops)
            num_suc += 1
        except Exception as e:
            print("\t cropping failed for {}".format(img_id))
            print(e)
            num_fail += 1
        num_panos += 1
    print("Finished. {} panos succeeded, {} failed.".format(num_suc, num_fail))

# Get list of img_id to crop

In [11]:
SAGEMAKER_PATH = r'/home/ec2-user/SageMaker'
SPLIT_PATH = os.path.join(SAGEMAKER_PATH, 'classify-streetview', 'split-train-test')
MINI_PATH = os.path.join(SAGEMAKER_PATH, 'classify-streetview', 'mini-crops')

In [12]:
df_crops = pd.read_csv(os.path.join(MINI_PATH, 'Crops_with_Labels.csv'))
df_crops.head()

Unnamed: 0,filename,file_size,file_attributes,region_count,region_id,region_shape_attributes,region_attributes,img_id,present_ramp,missing_ramp,...,margin,x_roi_left,x_roi_right,y_roi_top,y_roi_bottom,xpt_minus_xleft,xright_minus_xpt,ypt_minus_ytop,ybottom_minus_ypt,label_in_crop
0,680_45.jpg,49558,{},3,0,"{""name"":""point"",""cx"":108,""cy"":389}","{""Present Curb Ramp"":""1\n""}",680,True,False,...,10,15,175,330,490,103,77,69,111,True
1,680_45.jpg,49558,{},3,1,"{""name"":""point"",""cx"":160,""cy"":389}","{""Present Curb Ramp"":""1""}",680,True,False,...,10,15,175,330,490,155,25,69,111,True
2,680_135.jpg,51194,{},6,0,"{""name"":""point"",""cx"":18,""cy"":475}","{""Present Curb Ramp"":""1""}",680,True,False,...,10,15,175,330,490,13,167,155,25,True
3,680_225.jpg,47450,{},4,0,"{""name"":""point"",""cx"":179,""cy"":411}","{""Surface Problem"":""1""}",680,False,False,...,10,15,175,330,490,174,6,91,89,True
4,1042_45.jpg,45436,{},3,1,"{""name"":""point"",""cx"":90,""cy"":380}","{""Obstacle"":""1""}",1042,False,False,...,10,15,175,330,490,85,95,60,120,True


In [15]:
# Get class list
df_split = pd.read_csv(os.path.join(SPLIT_PATH, 'train-validation-test-imgid-list.csv'))
df_split = df_split[['img_id', 'train/val/test']]
print(df_split.shape)

df_split.head()

(480, 2)


Unnamed: 0,img_id,train/val/test
0,8475,validation
1,8503,test
2,8540,test
3,8650,validation
4,8780,validation


# Trial Croppping

In [22]:
example_imgid_heading = '1908_135'

In [23]:
make_mini_crops(example_imgid_heading)

In [26]:
# Test the make multiple 
example_list = ['1908_45', '1908_225', '1908_315']

In [29]:
make_sliding_window_crops(example_list)

Finished. 3 panos succeeded, 0 failed.


# Actually Crop 408 images of Interest

In [34]:
img_id_list = list(df_split['img_id'])
heading_list = [45, 135, 225, 315]

In [51]:
df_imgid_heading = pd.DataFrame(np.array(np.meshgrid(img_id_list, heading_list, )).T.reshape(-1,2))
df_imgid_heading.columns = ['img_id', 'heading']
df_imgid_heading['imgid_heading'] = df_imgid_heading['img_id'].astype(str) + '_' + df_imgid_heading['heading'].astype(str)
df_imgid_heading.head()

Unnamed: 0,img_id,heading,imgid_heading
0,8475,45,8475_45
1,8475,135,8475_135
2,8475,225,8475_225
3,8475,315,8475_315
4,8503,45,8503_45


In [52]:
make_sliding_window_crops(list(df_imgid_heading['imgid_heading']))

	 cropping failed for 680_45
An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.
	 cropping failed for 680_135
An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.
	 cropping failed for 680_225
An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.
	 cropping failed for 680_315
An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.
	 cropping failed for 878_45
An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.
	 cropping failed for 878_135
An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.
	 cropping failed for 878_225
An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.
	 cropping failed for 878_315
An error occurred (NoSuchKey) when calling the G