In [2]:
ROOT_DIR = os.path.abspath("../PSMNet-modified/")
sys.path.append(ROOT_DIR)

import argparse
import os
import sys
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable
import torch.nn.functional as F
import torchvision.transforms as transforms
from stackhourglass import PSMNet
import numpy as np
import time
import math
import cv2


__imagenet_stats = {'mean': [0.485, 0.456, 0.406],
                   'std': [0.229, 0.224, 0.225]}

def scale_crop(input_size, scale_size=None, normalize=__imagenet_stats):
    t_list = [
        transforms.ToTensor(),
        transforms.Normalize(**normalize),
    ]
    return transforms.Compose(t_list)

def get_transform(name='imagenet', input_size=None,
                  scale_size=None, normalize=None):
    normalize = __imagenet_stats
    input_size = 256
    return scale_crop(input_size=input_size, scale_size=scale_size, normalize=normalize)

def test(imgL,imgR):
    model = PSMNet(192)
    model = nn.DataParallel(model, device_ids=[0])
    model.cuda()
    state_dict = torch.load('pretrained_model_KITTI2012.tar')
    model.load_state_dict(state_dict['state_dict'])
    model.eval()

    imgL = imgL[155:155+384,:,:]
    imgR = imgR[155:155+384,:,:]
    imgL = cv2.cvtColor(imgL, cv2.COLOR_BGR2RGB)
    imgR = cv2.cvtColor(imgR, cv2.COLOR_BGR2RGB)

    processed = get_transform()

    imgL = processed(imgL).numpy()
    imgR = processed(imgR).numpy()

    imgL = np.reshape(imgL,[1,3,imgL.shape[1],imgL.shape[2]])
    imgR = np.reshape(imgR,[1,3,imgR.shape[1],imgR.shape[2]])
    print(imgL.shape)

    # pad to (384, 1248)
    top_pad = 384-imgL.shape[2]
    left_pad = 1248-imgL.shape[3]
    imgL = np.lib.pad(imgL,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0)
    imgR = np.lib.pad(imgR,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0)

    imgL = torch.FloatTensor(imgL).cuda()
    imgR = torch.FloatTensor(imgR).cuda()

    imgL, imgR = Variable(imgL), Variable(imgR)

    with torch.no_grad():
        output = model(imgL,imgR)
    output = torch.squeeze(output)
    pred_disp = output.data.cpu().numpy()

    return pred_disp

#load images

pred_disp = test(image_l,image_r)
left_pad  = 1248-946
img = pred_disp[0:,:-left_pad]
img = img*(255/np.max(img))
outname = outname_prefix + str(count) + '.png'
cv2.imshow(outname,img)

In [None]:
"""
VID_DIR = '../videos/'
left = cv2.VideoCapture("carL.mp4")
right = cv2.VideoCapture("carR.mp4")
outname_prefix = 'depth_cnn'
limit = 100
count = 0
while count <= limit:
    # Read in the left and right images
    success_l, image_l = left.read()
    success_r, image_r = right.read()
    if count == 99:
        left_pad  = 1248-946
        img = pred_disp[0:,:-left_pad]
        img = img*(255/np.max(img))
        outname = outname_prefix + str(count) + '.png'
        cv2.imwrite(outname,img)
    count += 1
    if count%20 == 0:
        print([str(count) + '/' + str(limit)])
"""

In [5]:
"""
outname_prefix = 'depth/depth_cnn'
limit = 99
video = cv2.VideoWriter('depth_video.mp4',cv2.VideoWriter_fourcc(*'mp4v'),24,(946,384),False)
for j in range(limit):
    img = cv2.imread(outname_prefix + str(j) + '.png')
    video.write(img)
video.release()
"""

In [None]:
from depth_map_creator import *

In [None]:
DEPTH_DIR = '../input_images/depth_maps/'
depth_img = [] 
depth_names = []
ord_names = []

print ("Reading in computed depth maps")
for filename in os.listdir(DEPTH_DIR):
    try:
        img = skimage.io.imread(os.path.join(DEPTH_DIR, filename))
        if img is not None:
            depth_img.append(img)
            depth_names.append(filename)
            ord_names.append(int(filename[9:].split('.')[0]))
    except:
        print('Cant import ' + filename)
    
# order the images by name to know in order which frame they belong
zipped = sorted(zip(ord_names, depth_names, depth_img))
ord_names, depth_names, depth_img = zip(*zipped)

In [None]:
def depth_list(lbbox,rbbox,depth):
    """
    Given depth image, list of left and right bbox coordinates and indexes,
    find depth per object and return according to index per image
    """
    out = np.zeros((20,3))
    k = 0
    # For each of the left bounding boxes,
    # find the one in rbbox that is the closest in
    # length/width, and centroids. 
    for i in range(0,lbbox.shape[0]):
        for j in range(0,rbbox.shape[0]):
            l_obj = lbbox[i]
            r_obj = rbbox[j]
            
            print ("Comparing ", l_obj, " to ", r_obj)

            l_x1 = l_obj[1]
            l_x2 = l_obj[3]
            l_y1 = l_obj[0]
            l_y2 = l_obj[2]
            
            r_x1 = r_obj[1]
            r_x2 = r_obj[3]
            r_y1 = r_obj[0]
            r_y2 = r_obj[2]

            l_length = abs(l_x2 - l_x1)
            l_width = abs(l_y2 - l_y1)
            l_centroid_x = l_x1 + 0.5*l_length 
            l_centroid_y = l_y1 + 0.5*l_width

            r_length = abs(r_x2 - r_x1)
            r_width = abs(r_y2 - r_y1)
            r_centroid_x = r_x1 + 0.5*r_length 
            r_centroid_y = r_y1 + 0.5*r_width

            thresh = 20
            print ("Lengths: ", l_length, r_length)
            print ("Widths: ", l_width, r_width)
            print ("Centroid (x): ", l_centroid_x, r_centroid_x)
            print ("Centroid (y): ", l_centroid_y, r_centroid_y) 
            if (abs(l_length - r_length) < thresh and abs(l_width - r_width) < thresh):
                depth_x1 = int((l_x1 + r_x1)/2.0)
                depth_x2 = int((l_x2 + r_x2)/2.0)
                depth_y1 = int((l_y1 + r_y1)/2.0)
                depth_y2 = int((l_y2 + r_y2)/2.0)
                depth_l_index = i
                depth_r_index = j
                depth_obj_matrix = depth[depth_x1:depth_x2,depth_y1:depth_y2]
                depth_obj = np.mean(depth_obj_matrix[depth_obj_matrix > 0])
                out[k] = [depth_obj, depth_l_index, depth_r_index]
                k += 1
                break 
    return out

In [None]:
for i in range(100,101):
    lrois = results[0][i][0]['rois'] 
    rrois = results[1][i][0]['rois']
    left_classes = results[0][i][0]['class_ids']
    right_classes = results[1][i][0]['class_ids']
    
    lbbox = []
    rbbox = []
    for j in range(len(lrois)): 
        if (left_classes[j] == 1): lbbox.append(lrois[j])
    for j in range(len(rrois)): 
        if (right_classes[j] == 1): rbbox.append(rrois[j])
            
    lbbox = np.array(lbbox)
    rbbox = np.array(rbbox)
            
    print (names[0][i], names[1][i], depth_names[i])
    out = depth_list(lbbox, rbbox, depth_img[i])
    print (out)
    
    for k in range(out.shape[0]):
        # get the indices of each thing, and try to visualize it
        display_imgs("foo", images[0][i], lrois, r['masks'], r['class_ids'], class_names, r['scores'], show=True)

    