In [68]:
from __future__ import print_function
import os
import shutil
import sys
import PIL
import glob
from matplotlib.pyplot import imshow
from PIL import  Image, ImageDraw
import matplotlib.pyplot as plt
import numpy as np
import cPickle
import itertools
%matplotlib notebook


In [59]:
#step 1 : read in bundle file
#step 2 : get the "0-cam" rate
#step 3 : if rate < thresh(or other heuristic):  we claim it is not good enough
#step 4 : first compute all the world-coordinate position for all valid segments (also for invalid bundle)
#step 5 : We can save them in a matrix-like
#step 6 : Store in file

In [185]:
def parseBundlerFile(fname):
    f = open(fname,'r')
    count = 0

    for lines in f.readlines():
        count = count + 1
        if count == 1: 
            # this is the title file
            continue
        elif count == 2:
            # second line is num cameras & num keypoints
            tmp = lines.split()
            num_cam = int(tmp[0])
            num_point = int(tmp[1])
            # the reading limit for camera information, each cam is described by 5 lines
            cam_limit = count + 5 * num_cam
            w, h = 3, 5*num_cam
            # camera information output
            CAM = [[0 for x in range(w)] for y in range(h)] 
            w_p,h_p = 3,3*num_point
            p_flag = 0
            PNT = [[0 for x in range(w_p)] for y in range(h_p)]
        elif count <= cam_limit:
            # those 0,1,2 has different meaning for different lines but all lines has 3 numbers
            CAM[count-3][0] = float(lines.split()[0])
            CAM[count-3][1] = float(lines.split()[1])
            CAM[count-3][2] = float(lines.split()[2])
        else:
            p_flag = p_flag + 1
            if p_flag == 1:
                #print count,h
                # 3D position
                for pos in range(0,3):
                    PNT[count-h-3][pos] = float(lines.split()[pos])
            elif p_flag == 2:
                #print count,h
                # RGB color of this keypoint
                for rgb_p in range(0,3):
                    PNT[count-h-3][rgb_p] = float(lines.split()[rgb_p])
            elif p_flag == 3:
                p_flag = 0
                sp=lines.split()
                assert(len(sp)==(4*int(sp[0])+1))
                for view_p in range(4*int(sp[0])):
                    sp[view_p+1]=float(sp[view_p+1])
                PNT[count-h-3]=sp[1:]
    f.close()
    return CAM, PNT

def parseCam(cam):
    # parse the camera into better format
    cam=np.asarray(cam)
    out=[]
    num_cam=int(len(cam)/5)
    for i in range(num_cam):
        this_cam={}
        subcam=cam[i*5:(i+1)*5]
        this_cam["focal_len"]=subcam[0][0]
        this_cam["distort_coeff"]=subcam[0][1:]
        this_cam["R"]=np.matrix(subcam[1:4])
        this_cam["t"]=np.matrix(subcam[4]).T
        out.append(this_cam)
    return out

def parseKeypoints(pnt):
    num_keypoint=int(len(pnt)/3)
    out=[]
    for i in range(num_keypoint):
        this_point={}
        subpnt=pnt[i*3:(i+1)*3]
        this_point["position"]=np.matrix(subpnt[0]).T
        this_point["color"]=np.asarray(subpnt[1])
        # parse the occur of this keypoint in all cameras
        view_list=[]
        for j in range(int(len(subpnt[2])/4)):
            this_cam={}
            subsubpnt=subpnt[2][j*4:(j+1)*4]
            this_cam["camera_index"]=int(subsubpnt[0])
            this_cam["sift_index"]=int(subsubpnt[1])
            this_cam["position"]=np.matrix(subsubpnt[2:]).T
            view_list.append(this_cam)
        this_point["view_list"]=view_list
        
        out.append(this_point)
    return out
def getdata(filename):
    cam, kp = parseBundlerFile(filename)
    cam = parseCam(cam)
    kp = parseKeypoints(kp)
    point_num = len(kp)
    total_num = len(cam)
    zero_num = 0
    pos_matrix = [[0 for x in range(total_num)] for y in range(total_num)] 
    tensor_pos = []
    return cam, kp, point_num, total_num, zero_num, pos_matrix, tensor_pos
def position(cam_1,cam_2):
    cam_2_world = -cam_2['R'].T*cam_2['t']
    position = cam_1['R']*cam_2_world + cam_1['t']
    position = position.T
    position = position.tolist()[0]
    return position

def cam_worlds(cams):
    out=np.zeros(shape=(3, len(cams)))
    for i, cam in enumerate(cams):
        cam_world = -cam['R'].T*cam['t']
        out[:, i]=np.squeeze(cam_world)
        
    return out

def position_vec(cam1, cam_world_locs):
    pos = cam1['R']*cam_world_locs + cam1['t'] # output shape = 3*n
    pos = pos.T
    pos = pos.ravel()
    pos = pos.tolist()[0]
    return pos
    
def data_update(total_num, cam, zero_num, filename,pos_matrix, tensor_pos):
    dist = [0]
    cam_world_locs = cam_worlds(cam)
    
    for i in xrange(0,total_num):
        if  np.count_nonzero(cam[i]['R'])==0  and np.count_nonzero( cam[i]['t'] ) == 0:
            zero_num = zero_num + 1
        if i !=total_num-1:
            dist.append(np.linalg.norm(cam[i+1]['t']-cam[i]['t']))    
        this_pos=position_vec(cam[i], cam_world_locs)
        tensor_pos = itertools.chain(tensor_pos, this_pos)
    tmp = filename.split('/')
    name = tmp[1]+'/'+tmp[2]
    return dist, pos_matrix, name, zero_num, tensor_pos
def get_stop(total_num, cam, zero_num, filename,pos_matrix, dist, avg):
    stop = [1,1]
    epsilon = 0.1*avg
    for i in xrange(2,total_num):
        if dist[i] < epsilon and dist[i-1] < epsilon:
            stop.append(1)
        else:
            stop.append(0)
    return stop

In [195]:
def process_all():
    # read file in a loop
    file_num = 0;
    data = []
    ''' 
            image/ego: [[.1,.1,.1],[.2,.2,.2],..]
            image/class/video_name: 'videoname.ts'
            image/valid_label: 1
            image/stop_label: 1
            image/format: 'JPEG'
            image/class/img_name: '00001.JPEG'
            image/encoded: <JPEG encoded string> '''
    all_file = sorted(glob.iglob(os.path.join('./', '*','seg*','bundle', 'bundle.out')))
    for ifile, filename in enumerate(all_file):
        cam,kp,point_num, total_num,zero_num, pos_matrix, tensor_pos = getdata(filename)
        file_num = file_num + 1;


        # check camera
        dist, pos_matrix, name, zero_num, tensor_pos= \
            data_update(total_num, cam, zero_num, filename,pos_matrix, tensor_pos)
        #print(len(tensor_pos))
        #print(list(tensor_pos))
        #print(name)
        #check dis
        avg  = sum(dist)/len(dist)
        
        flag = [ x<5*avg for x in dist]
        flag.insert(0,True)
        index = [i for i, j in enumerate(flag) if j == True]
        stop = get_stop(total_num, cam, zero_num, filename,pos_matrix, dist, avg)
        
        this_data = {"video_name":name,
                     "cam_num":total_num,
                     "egomotion":list(tensor_pos),
                     "can_use":0,
                     "valid_label":flag,
                     'stop_label':stop}


        rate = float(zero_num)/float(total_num)
        if rate <=0.05 and point_num > 1500:
            # ok the data seems good here! this is tight! We can relax this later.
            this_data["can_use"] = 1
        data.append(this_data)
        print(file_num)
        if ifile==10:
            #print(avg)
            #print(stop)
            print(dist)
            print()
            break

In [196]:
import cProfile
#cProfile.run('process_all()')
process_all()


1
2
3
4
5
6
7
8
9
10
11
[0, 0.0070339933599053162, 0.0090815317522146052, 0.052688013461968781, 0.012034393090168167, 0.061484718499352435, 0.026072290713449669, 0.03209120351965121, 0.045523542317529482, 0.028067412021059918, 0.023051488675489798, 0.03958078828784993, 0.040684318427432989, 0.0064775943168287721, 0.047467101054230659, 0.031186604173511562, 0.03457894690210251, 0.029106199090055577, 0.021230114565845021, 0.043744472116816813, 0.026909075314255362, 0.03187798165063252, 0.031940480143772328, 0.021019860876626145, 0.024459043097922798, 0.035972376679606857, 0.034642674829935931, 0.065014701877273859, 0.0043999651925985418, 0.0014218910670173814, 0.0020963058198852549, 0.063398126347112282, 0.034209606224611028, 0.024812369109762471, 0.022570292470298969, 0.024679791836678049, 0.022224901496885649, 0.01450309553975709, 0.029211756962772381, 0.024360022375897593, 0.026290588666518316, 0.015903204452920258, 0.026150652431445438, 0.021266716666329826, 0.032605520783684561, 0.0

149


In [88]:
cPickle.dump(data, open("data.dat", "w"))

obj = cPickle.load(open("data.dat", "r"))
print(len(obj))
not_valid = 0
for i in range(0,len(data)):
    if data[i]["valid"] == 0:
        not_valid = not_valid + 1
print(not_valid)
print(len(data))

859


In [57]:
data[0]['valid_label']
cPickle.dump(data[0:2], open("toy.dat", "w"))

In [52]:
data[0]['egomotion']

[[[2.926658915214375e-12, 2.618127936671044e-12, -5.610623077245691e-12],
  [-0.005863348640788857, -0.00032857034846434097, -0.015855299459926364],
  [0.007724847330795481, -0.007485623923884632, -0.11309856675348584],
  [0.014045529168715098, -0.008307073181152203, -0.09091105466477156],
  [0.014115869903654599, -0.01878497915255284, -0.223318003347047],
  [0.017779678697926793, -0.02287525094896037, -0.27563948330548627],
  [0.024311276070428933, -0.027076074063540234, -0.3210492837833012],
  [0.027684285918594342, -0.03182910917894016, -0.3836848000544215],
  [0.026657984410027258, -0.03758307968819313, -0.4533955316129754],
  [0.03841086498640833, -0.04200740499401556, -0.47970834012872565],
  [0.04286897366610021, -0.04665844242297934, -0.5386981507804238],
  [0.04645012012793981, -0.048933263121667236, -0.5946324679470463],
  [0.03874871562445498, -0.055056615411252396, -0.679050840865246],
  [0.05155578361566454, -0.06012167416541425, -0.7136375352829551],
  [0.0527194006078309