In [None]:
import os
import numpy as np
import imageio
import cv2
import ipywidgets as widgets
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pickle as pkl
import pandas as pd 

from mpl_toolkits.mplot3d import Axes3D
from math import sqrt
from statistics import mean
from  sklearn.metrics import mean_squared_error

GRID_LENGTH = 4.95
BASELINE = 12 # cm
PIXEL_LENGTH = 0.0002 # cm
FOCAL_LENGTH = PIXEL_LENGTH*1000 # cm (may need to double check camera model)

# saved_indices = {}
# saved_indices['cup_all_testset'] = cup_index
# saved_indices['teabag_all_testset'] = teabag_index
# saved_indices['tap_all_testset'] = tap_index
# with open('./selected_indices.pkl', 'wb') as file:
#     pkl.dump(saved_indices, file)

with open('selected_indices.pkl', 'rb') as file:
    saved_indices = pkl.load(file)
    
teabag_index = saved_indices['teabag_all_testset']
cup_index = saved_indices['cup_all_testset']
tap_index = saved_indices['tap_all_testset']
tap_true_vect = [(1,0,0), (0,1,0),(2,2,0), (1,-2,0), (-3,2,0)]
tb_true_vect = [(1,0,0), (0,1,0),(2,2,0), (2,-3,0), (0,5,0), (-4,1,0), (6,-5,0)]
cup_true_vect = [(1,0,0), (0,1,0),(3,2,0), (-2,2,0), (4,1,0), (2,-4,0), (-2,-1, 0)]

def distanceCal(x,y,z):
    return sqrt(x**2 + y**2 + z**2)

def formHeaders(prefix, parts):
    new_headers = prefix
    for part in parts:
        for j, word in enumerate(part):
            new_headers[j] = '-'.join([new_headers[j], word])
    return new_headers

def getVectorDisplacement(coord3D, selected_indices=[]):
    # convert 3D coordinates of objects to their corresponding displacement
    # vectors from frame to frame by selected indices or all frames if empty.
    prev_coord, result_vectors = False, []
    for ind, object_coord in enumerate(coord3D):
        if len(selected_indices) > 1 and not ind in selected_indices: 
            continue
        if not prev_coord:
            prev_coord = object_coord
        else: 
            obj_vectors = []
            for m in range(len(object_coord)):
                X_delta = object_coord[m][0] - prev_coord[m][0]
                Y_delta = object_coord[m][1] - prev_coord[m][1] 
                Z_delta = object_coord[m][2] - prev_coord[m][2]
                obj_vectors.append([X_delta, Y_delta, Z_delta])
            result_vectors.append(obj_vectors)
            prev_coord = object_coord
    return result_vectors

def pixelTo3DCameraCoord(left_img, disp_map, coords):
    result_coords = []
    img_dims, disp_dims = left_img.shape, disp_map.shape
    fixed_ratios = [img_dims[dim]/disp_dims[dim] for dim in range(2)]
    for pix in coords:
        # Different ordering of dims between coordinate and images fixed here.
        if len(pix) == 0: continue
        x_l, y_l = (int(i) for dim, i in enumerate(pix[0]))
        d_x, d_y = int(pix[0][0]/fixed_ratios[1]), int(pix[0][1]/fixed_ratios[0])
        d = disp_map[d_y, d_x]*fixed_ratios[1]
        x_r = int(x_l - d)
        
        # Z is the depth from camera center in cm and X, Y for the other 2 axis.
        Z = BASELINE*FOCAL_LENGTH/(d*PIXEL_LENGTH)
        X, Y = (x_l - img_dims[1]/2)*PIXEL_LENGTH*Z/FOCAL_LENGTH, (y_l - img_dims[0]/2)*PIXEL_LENGTH*Z/FOCAL_LENGTH
        result_coords.append({'left_x':x_l, 'left_y':y_l, 'right_x':x_r, 'X':X, 'Y':Y, 'Z':Z})
    return result_coords

def LEAStereoCoordinate(l_img_path, disp_path, l_datafile):
    with open(l_datafile, 'rb') as l_dataf:
        l_data = pkl.load(l_dataf)
    left_cam_files = os.listdir(l_img_path)
    disp_files = os.listdir(disp_path)
    coordinate3D = {}
    for framename in l_data.keys():
        img_file = '{}.jpg'.format(framename)
        disp_file = "{}_disp.npy".format(img_file.split('.')[0])
        if img_file in left_cam_files:
            im_disp = np.load(disp_path + disp_file)
            im_l = imageio.imread(l_img_path + img_file)
            pixel_coordinates = l_data[framename]['coordinates'][0]
            coordinate3D[framename] = pixelTo3DCameraCoord(im_l, im_disp, pixel_coordinates)
    return coordinate3D

def getMeasurements(model_pos): 
    model_dist = []
    model_vect = getVectorDisplacement(model_pos)
    for vector_set in model_vect:
        dist_set = []
        for vec in vector_set:
            dist_set.append(distanceCal(*vec))
        model_dist.append(mean(dist_set))
    return model_vect, model_dist

def findLosses(true_values, raw_values, scales):
    losses = []
    for scaling in scales:
        new_raw_values = [scaling*i for i in raw_values]
        losses.append(mean_squared_error(true_values, new_raw_values))
    return losses

In [None]:
# Getting the coordinate from the dlc folder for now
object_set = 'teabag_all_testset'
l_tb_img_dir = './dataset/{}/left/'.format(object_set)
r_tb_img_dir = './dataset/{}/right/'.format(object_set)
tb_disp_path = './dataset/{}/disparity/'.format(object_set)

l_tb_datafile = '../camera-main/videos/{}/1639767318-leftDLC_resnet50_make_tea_multiNov11shuffle1_100000_full.pickle'.format(object_set)
r_tb_datafile = '../camera-main/videos/{}/1639767318-rightDLC_resnet50_make_tea_multiNov11shuffle1_100000_full.pickle'.format(object_set)

with open(r_tb_datafile, 'rb') as r_dataf:
    r_tb_data = pkl.load(r_dataf)
        
tb_coordinate3D = LEAStereoCoordinate(l_tb_img_dir, tb_disp_path, l_tb_datafile)

tb_true_dist = []
tb_model_pos = []
for vec in tb_true_vect:
    vec = [GRID_LENGTH*i for i in vec]
    tb_true_dist.append(distanceCal(*vec))
        
for i in tb_coordinate3D.keys():
    holder = []
    for pos in tb_coordinate3D[i][:3]:
        holder.append([pos['X'], pos['Y'], pos['Z']])
    tb_model_pos.append(holder)

tb_model_vect, tb_model_dist = getMeasurements(tb_model_pos)
# print([tb_true_dist[i] - tb_model_dist[i] for i in range(7)])
print('LEAstereo MSE:', mean_squared_error(tb_true_dist, tb_model_dist))

In [None]:
tb_dlc_pos = []
tb3d_path = '../camera-main/videos/{}/1639767318_make_tea_3D.csv'.format(object_set)
tb3d_csv = pd.read_csv(tb3d_path)
part1 = tb3d_csv.iloc[0].tolist()
part2 = tb3d_csv.iloc[1].tolist()
part3 = tb3d_csv.iloc[2].tolist()

new_headers = formHeaders(part1, [part2, part3])
new_tb3d_csv = tb3d_csv[3:].copy()
new_tb3d_csv.columns = new_headers

new_tb3d_csv.reset_index(inplace=True)
new_tb3d_csv = new_tb3d_csv.loc[:,['ind1-head-x', 'ind1-head-y', 'ind1-head-z',
                                    'ind1-middle-x', 'ind1-middle-y', 'ind1-middle-z',
                                    'ind1-tail-x', 'ind1-tail-y', 'ind1-tail-z']]
for i in teabag_index:
    row = [float(i) for i in new_tb3d_csv.iloc[i].tolist()]
    tb_dlc_pos.append([row[:3], row[3:6], row[6:9]])
    
tb_dlc_vect, tb_dlc_dist = getMeasurements(tb_dlc_pos)
scales = np.arange(2, 3, 0.05)
loss = findLosses(tb_true_dist, tb_dlc_dist, scales)

plt.plot(scales, loss)
print('Min MSE:', min(loss))

In [None]:
count = 1
for ind, key in enumerate(tb_coordinate3D.keys()):
    lx, ly, rx = [], [], [] 
    dlc_ry, dlc_rx = [], [] 
    right_coord = r_tb_data[key]['coordinates'][0]
    for pix in right_coord[:3]:
        if len(pix) == 0: continue
        x_l, y_l = (int(i) for dim, i in enumerate(pix[0]))
        dlc_ry.append(y_l)
        dlc_rx.append(x_l)
    for coor in tb_coordinate3D[key][:3]:
        lx.append(coor['left_x'])
        ly.append(coor['left_y'])
        rx.append(coor['right_x'])
    img_file = '{}.jpg'.format(key)
    img_left = imageio.imread(l_tb_img_dir + img_file)
    img_right = imageio.imread(r_tb_img_dir + img_file)

    disp_path = "{}_disp.npy".format(tb_disp_path + img_file.split('.')[0])
    img_disparity = np.load(disp_path)
    fig = plt.figure(figsize=(16, 40))

    fig.add_subplot(8, 2, count)
    plt.title('Left Image')
    plt.imshow(img_left)
    plt.scatter(lx, ly)
    fig.add_subplot(8, 2, count+1)
    plt.title('Right Image')
    plt.imshow(img_right)
    plt.scatter(rx, ly)
    plt.scatter(dlc_rx, dlc_ry)
    count += 2
#     fig.add_subplot(3, 1, 3)
#     plt.title('Disparity (Left-Camerea View)')
#     plt.imshow(img_disparity)

In [None]:
# Getting the coordinate from the dlc folder for now
tap_object_set = 'tap_all_testset'
l_tap_img_dir = './dataset/{}/left/'.format(tap_object_set)
r_tap_img_dir = './dataset/{}/right/'.format(tap_object_set)
tap_disp_path = './dataset/{}/disparity/'.format(tap_object_set)

l_tap_datafile = '../camera-main/videos/{}/1639767563-leftDLC_resnet50_make_tea_multiNov11shuffle1_100000_full.pickle'.format(tap_object_set)
r_tap_datafile = '../camera-main/videos/{}/1639767563-rightDLC_resnet50_make_tea_multiNov11shuffle1_100000_full.pickle'.format(tap_object_set)

tap_true_dist = []
tap_model_pos = []
with open(r_tap_datafile, 'rb') as r_dataf:
    r_tap_data = pkl.load(r_dataf)
for vec in tap_true_vect:
    vec = [GRID_LENGTH*i for i in vec]
    tap_true_dist.append(distanceCal(*vec))
    
tap_coordinate3D = LEAStereoCoordinate(l_tap_img_dir, tap_disp_path, l_tap_datafile)

for i in tap_coordinate3D.keys():
    holder = []
    for pos in tap_coordinate3D[i][-3:-1]:
        holder.append([pos['X'], pos['Y'], pos['Z']])
    tap_model_pos.append(holder)
    
tap_model_vect, tap_model_dist = getMeasurements(tap_model_pos)
print([tap_true_dist[i] - tap_model_dist[i] for i in range(len(tap_true_dist))])
print('LEAstereo MSE:', mean_squared_error(tap_true_dist, tap_model_dist))

In [None]:
tap_dlc_pos = []
tap3d_path = '../camera-main/videos/{}/1639767563_make_tea_3D.csv'.format(tap_object_set)
tap3d_csv = pd.read_csv(tap3d_path)
part1 = tap3d_csv.iloc[0].tolist()
part2 = tap3d_csv.iloc[1].tolist()
part3 = tap3d_csv.iloc[2].tolist()

new_headers = formHeaders(part1, [part2, part3])
new_tap3d_csv = tap3d_csv[3:].copy()
new_tap3d_csv.columns = new_headers
new_tap3d_csv.reset_index(inplace=True)
new_tap3d_csv = new_tap3d_csv.loc[:,['single-tap_h-x', 'single-tap_h-y', 'single-tap_h-z',
                                    'single-tap_m-x', 'single-tap_m-y', 'single-tap_m-z']]


# new_tap3d_csv = new_tap3d_csv.drop(new_tap3d_csv.columns[[0]], axis=1) 
for i in tap_index:
    row = [float(i) for i in new_tap3d_csv.iloc[i].tolist()]
    tap_dlc_pos.append([row[:3], row[3:6]])
    
tap_dlc_vect, tap_dlc_dist = getMeasurements(tap_dlc_pos)
scales = np.arange(2, 3, 0.05)
loss = findLosses(tap_true_dist, tap_dlc_dist, scales)

plt.plot(scales, loss)
print('Min MSE:', min(loss))

In [None]:
count = 1
for ind, key in enumerate(tap_coordinate3D.keys()):
    lx, ly, rx = [], [], [] 
    dlc_ry, dlc_rx = [], [] 
    right_coord = r_tap_data[key]['coordinates'][0]
#     for pix in right_coord:
#         if len(pix) == 0: continue
#         x_l, y_l = (int(i) for dim, i in enumerate(pix[0]))
#         dlc_ry.append(y_l)
#         dlc_rx.append(x_l)
    for coor in tap_coordinate3D[key][-3:]:
        lx.append(coor['left_x'])
        ly.append(coor['left_y'])
        rx.append(coor['right_x'])
    img_file = '{}.jpg'.format(key)
    img_left = imageio.imread(l_tap_img_dir + img_file)
    img_right = imageio.imread(r_tap_img_dir + img_file)

    disp_path = "{}_disp.npy".format(tap_disp_path + img_file.split('.')[0])
    img_disparity = np.load(disp_path)
    fig = plt.figure(figsize=(16, 40))

    fig.add_subplot(8, 2, count)
    plt.title('Left Image')
    plt.imshow(img_left)
    plt.scatter(lx, ly)
    fig.add_subplot(8, 2, count+1)
    plt.title('Right Image')
    plt.imshow(img_right)
    plt.scatter(rx, ly)
#     plt.scatter(dlc_rx, dlc_ry)
    count += 2
#     fig.add_subplot(3, 1, 3)
#     plt.title('Disparity (Left-Camerea View)')
#     plt.imshow(img_disparity)

In [None]:
# Getting the coordinate from the dlc folder for now
cup_object_set = 'cup_all_testset'
l_cup_img_dir = './dataset/{}/left/'.format(cup_object_set)
r_cup_img_dir = './dataset/{}/right/'.format(cup_object_set)
cup_disp_path = './dataset/{}/disparity/'.format(cup_object_set)

l_cup_datafile = '../camera-main/videos/{}/1639767445-leftDLC_resnet50_make_tea_multiNov11shuffle1_100000_full.pickle'.format(cup_object_set)
r_cup_datafile = '../camera-main/videos/{}/1639767445-rightDLC_resnet50_make_tea_multiNov11shuffle1_100000_full.pickle'.format(cup_object_set)

cup_true_dist = []
cup_model_pos = []
with open(r_cup_datafile, 'rb') as r_dataf:
    r_cup_data = pkl.load(r_dataf)
for vec in cup_true_vect:
    vec = [GRID_LENGTH*i for i in vec]
    cup_true_dist.append(distanceCal(*vec))
    
cup_coordinate3D = LEAStereoCoordinate(l_cup_img_dir, cup_disp_path, l_cup_datafile)

for i in cup_coordinate3D.keys():
    holder = []
    for pos in cup_coordinate3D[i][3:6]:
        holder.append([pos['X'], pos['Y'], pos['Z']])
    cup_model_pos.append(holder)
    
cup_model_vect, cup_model_dist = getMeasurements(cup_model_pos)
# print([cup_true_dist[i] - cup_model_dist[i] for i in range(len(cup_true_dist))])
print('Cup LEAstereo MSE:', mean_squared_error(cup_true_dist, cup_model_dist))

In [None]:
cup_dlc_pos = []
cup3d_path = '../camera-main/videos/{}/1639767445_make_tea_3D.csv'.format(cup_object_set)
cup3d_csv = pd.read_csv(cup3d_path)
part1 = cup3d_csv.iloc[0].tolist()
part2 = cup3d_csv.iloc[1].tolist()
part3 = cup3d_csv.iloc[2].tolist()

new_headers = formHeaders(part1, [part2, part3])
new_cup3d_csv = cup3d_csv[3:].copy()
new_cup3d_csv.columns = new_headers
new_cup3d_csv.reset_index(inplace=True)
new_cup3d_csv = new_cup3d_csv.loc[:,['single-cup_h-x', 'single-cup_h-y', 'single-cup_h-z',
                                    'single-cup_m-x', 'single-cup_m-y', 'single-cup_m-z',
                                    'single-cup_t-x', 'single-cup_t-y', 'single-cup_t-z']]


# new_tap3d_csv = new_tap3d_csv.drop(new_tap3d_csv.columns[[0]], axis=1) 
for i in cup_index:
    row = [float(i) for i in new_cup3d_csv.iloc[i].tolist()]
    cup_dlc_pos.append([row[:3], row[3:6], row[6:9]])
    
cup_dlc_vect, cup_dlc_dist = getMeasurements(cup_dlc_pos)
scales = np.arange(2, 3, 0.05)
loss = findLosses(cup_true_dist, cup_dlc_dist, scales)

plt.plot(scales, loss)
print('Cup Min MSE:', min(loss))

In [None]:
# combined leastereo distances
combine_true_dist = tb_true_dist + tap_true_dist + cup_true_dist
combine_model_dist = tb_model_dist + tap_model_dist + cup_model_dist
combine_dlc_dist = tb_dlc_dist + tap_dlc_dist + cup_dlc_dist

scales = np.arange(2, 3, 0.05)
combine_model_mse = mean_squared_error(combine_true_dist, combine_model_dist)
combine_dlc_mse = findLosses(combine_true_dist, combine_dlc_dist, scales)

plt.plot(scales, combine_dlc_mse)
print('Combine Min LEAstereo MSE:', combine_model_mse)
print('Combine Min DLC MSE:', min(combine_dlc_mse))

In [None]:

# count=0
# extra_path = '../camera-main/videos/tap_all_testset/1639767563-left.mp4'
# vidcap = cv2.VideoCapture(extra_path)
# success, first_frame = vidcap.read()
# while success:
# #     cv2.imwrite(os.path.join(l_img_dir, "frame{}.jpg".format(str(count).zfill(2))), image)     # save frame as JPEG file      
#     success, image = vidcap.read()
#     print('Read a new frame: ', success, count)
#     cv2.imshow("Input", image)
#     cv2.waitKey(0)
#     count += 1

In [None]:
# # Select the frame in the dataset folder that will be shown below
# shown_frame = 'frame002.jpg'

# lx, ly, rx = [], [], []
# for coor in cam_coordinates[shown_frame]:
#     lx.append(coor['left_x'])
#     ly.append(coor['left_y'])
#     rx.append(coor['right_x'])
# img_left = imageio.imread(l_img_dir + shown_frame)
# img_right = imageio.imread(r_img_dir + shown_frame)
# disp_path = "{}_disp.npy".format(disp_dir + shown_frame.split('.')[0])
# img_disparity = np.load(disp_path)
# fig = plt.figure(figsize=(16, 20))
# fig.add_subplot(3, 1, 1)
# plt.title('Right Image')
# plt.imshow(img_right)
# plt.scatter(rx, ly)
# fig.add_subplot(3, 1, 2)
# plt.title('Left Image')
# plt.imshow(img_left)
# plt.scatter(lx, ly)
# fig.add_subplot(3, 1, 3)
# plt.title('Disparity (Left-Camerea View)')
# plt.imshow(img_disparity)

In [None]:
tb_true_pos

In [None]:
x, y, z = [], [], []
tb_true_pos = []

cur_set = tb_model_pos[0].copy()
tb_true_pos.append(cur_set.copy())
for delta in tb_true_vect:
    for i in range(len(delta)):
        cur_set[i] = [cur_set[i][j] + delta[j]*GRID_LENGTH for j in range(3)]
    tb_true_pos.append(cur_set.copy())
    
for obj_set in tb_true_pos:
    for coor in obj_set:
        x.append(coor[0])
        y.append(coor[1])
        z.append(coor[2])
    
fig = plt.figure()
ax = Axes3D(fig)

# Z-axis increasing the further objects are and X-axis increasing to the right 
# is the original camera view in the left camera coordinate system.
plot_geeks = ax.scatter(x, y, z)
ax.set_title('3D Coordinate Plot')
ax.set_xlabel('x-axis')
ax.set_ylabel('y-axis')
ax.set_zlabel('z-axis')
ax.view_init(270, 270)
plt.show()

In [None]:
help(ax.view_init)

In [None]:
# %matplotlib widget
x, y, z = [], [], []
for obj_set in tb_model_pos:
    for coor in obj_set:
        x.append(coor[0])
        y.append(coor[1])
        z.append(coor[2])
    
fig = plt.figure()
ax = Axes3D(fig)

# Z-axis increasing the further objects are and X-axis increasing to the right 
# is the original camera view in the left camera coordinate system.
plot_geeks = ax.scatter(x, y, z)
ax.set_title('3D Coordinate Plot')
ax.set_xlabel('x-axis')
ax.set_ylabel('y-axis')
ax.set_zlabel('z-axis')
ax.view_init(270, 270)
plt.show()


In [None]:
# %matplotlib widget
x, y, z = [], [], []
for obj_set in tb_dlc_pos:
    for coor in obj_set:
        x.append(coor[0])
        y.append(coor[1])
        z.append(coor[2])
    
fig = plt.figure()
ax = Axes3D(fig)

# Z-axis increasing the further objects are and X-axis increasing to the right 
# is the original camera view in the left camera coordinate system.
plot_geeks = ax.scatter(x, y, z)
ax.set_title('3D Coordinate Plot')
ax.set_xlabel('x-axis')
ax.set_ylabel('y-axis')
ax.set_zlabel('z-axis')
ax.view_init(270, 270)
plt.show()
