In [1]:
import sys
sys.path.append("../src")
import os
import glob
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image

import transform as T
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

torch.cuda.is_available()

True

# load a model 
pre-trained on COCO, resnet 50

In [2]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
device = torch.device('cuda')
model.to(device)
_ = model.eval()

In [3]:
# functions

def get_prediction(image_path, model, threshold=0.2):
    
    # convert image
    img = Image.open(image_path)
    transform = T.Compose([T.ToTensor()])
    img, _ = transform(img, {})
    
    # forward
    pred = model([img.cuda()])
    
    # process result
    is_people = (pred[0]['labels'] == 1) & ((pred[0]['scores'] > threshold))
    box = [[i[0][0], i[0][1], i[0][2], i[0][3]] for i in list(pred[0]['boxes'][torch.nonzero(is_people)].detach().cpu() .numpy())]
    score = list(pred[0]['scores'][torch.nonzero(is_people)].flatten().detach().cpu().numpy())
    
    return box, score, img


def vis_detections(im, boxes, scores, saveas=None):
    """Draw detected bounding boxes."""
    im = Image.fromarray(im.mul(255).permute(1, 2, 0).byte().numpy())
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.imshow(im, aspect='equal')
    for i, bbox in enumerate(boxes):
        ax.add_patch(
            plt.Rectangle((bbox[0], bbox[1]),
                          bbox[2] - bbox[0],
                          bbox[3] - bbox[1],
                          fill=False,
                          edgecolor='red',
                          linewidth=3.5))
        ax.text(
            bbox[0],
            bbox[1] - 2,
            '{:.3f}'.format(scores[i]),
            fontsize=14,
            color='white')
    plt.axis('off')
    plt.tight_layout()
    if saveas:
        plt.savefig(saveas)
        plt.close()
    else:
        plt.draw()

def run_detect(image_path, model, outdir="../data/annotation"):
    box, score, img = get_prediction(image_path, model)
    image_name = image_path.split('/')[-1]
    result = {'image': image_name, 'exp': sum(score)}
    vis_detections(img, box, score, saveas=os.path.join(outdir, 'anno_' + image_name))
    return result

# Read image paths

In [4]:
image_dir = "../data/gsv"
images = [img_path for img_path in \
          glob.glob(image_dir+'/*.jpg')]

## Test with one image

In [5]:
run_detect(images[0], model)

{'image': 'image_23_80.jpg', 'exp': 4.975166320800781}

# Run all images

In [6]:
results = list(map(lambda x: run_detect(x, model), images))

In [18]:
df = pd.DataFrame.from_records(results)
df.columns = ['image', 'score']
df['lid'] = df['image'].str.split('_').str[1]

In [19]:
df.to_csv("../data/dataframe/nyc-cnn-exp.csv", index=False)