# Video to Dataset Draft
*Contributors: Lily*

Assumed format of the input CSV:

`start (ms) | end (ms) | delete | class 1 | class 2 | ... | class N`

eg.
`200,1300,0,1,0,0,0`
`200,1300,0,1,,,`

**File must have header corresponding to class names or numbers. They will be used in the output labels**

in class columns: 1 for yes, 0 for no

Does not necessarily need to be ordered

### Requires:
- OpenCV
- NumPy

In [1]:
# imports
import cv2
import os
import numpy as np

# set up file structure
data_dir = 'C:\\Users\\bookl\\Desktop\\School\\Subbots\\subbots-drafts\\src' # absolute path to where you want the data dir to appear. otherwise will generate in the same directory as the notebook.
partition_file = 'partitions-arvp.csv' # file name or filepath to partition csv
video_file = 'C:\\Users\\bookl\\Desktop\\School\\Subbots\\ARVP_2280MB\\ARVP\\arvp_front_camera.mp4' # file name or path to video file

In [2]:
cam = cv2.VideoCapture(video_file)

print('frame count:'+str(cam.get(cv2.CAP_PROP_FRAME_COUNT)))
print('frame rate:'+str(cam.get(cv2.CAP_PROP_FPS)))

FPS=cam.get(cv2.CAP_PROP_FPS)

data_dir += ('\\data')
if not os.path.exists(data_dir):
    os.makedirs(data_dir)
    print('Created data directory')

img_dir=os.path.join(data_dir,'images')
if not os.path.exists(img_dir):
    os.makedirs(img_dir)
    print('Created images directory')
    
label_dir=os.path.join(data_dir,'labels')
if not os.path.exists(label_dir):
    os.makedirs(label_dir)
    print('Created labels directory')   

frame count:7781.0
frame rate:30.0


In [7]:
# Utilities :)

'''
Naming Convention: 
    frameno -> the frame number; global reference within the video. 
    imgno -> the recorded image number; the number of images you HAVE
'''

# separator for the final label files
SEP=' '

# checks if frameno falls in the partition's start/end range
def in_range(frameno,partition):
    if type(partition) is Partition:
        return frameno>=partition.start and frameno<=partition.end
    else:
        return frameno>=partition[0] and frameno<=partition[1]

# returns the unique filepath for each image
def data_path(imgno):
    return os.path.join(img_dir,'img_'+str(imgno)+'.jpg')

# Determines if the frame should be deleted
def wanted(frameno):
    if frameno<2: 
        return False
    return not any(in_range(frameno,deletion) for deletion in deletions)

'''
coco dataset uses bounding box centerx,centery,height,width; normalized to be in [0,1]
    rect: output of selectROI [x1,y1,w,h]
    dims: output of getImageWindowRect [x1,y1,w,h]
'''
def convert_to_coco(rect,dims):
    width=rect[2]/dims[2]
    height=rect[3]/dims[3]
    centerx=(rect[0]+rect[2]/2)/dims[2]
    centery=(rect[1]+rect[3]/2)/dims[3]
    
    # print([centerx,centery,width,height])
    return [centerx,centery,width,height]

def label(imgno,frameno):
    img_name= data_path(imgno) #'data\\images\\img_'+str(imgno)+'.jpg'
    img = cv2.imread(img_name,cv2.IMREAD_UNCHANGED)
    data_string= ''
    
    has_class=np.zeros_like(classes,dtype=int)
    
    for P in partitions:
        if in_range(frameno, P):
            has_class = [has_class_i + 1 if P.classes[i] else has_class_i for i, has_class_i in enumerate(has_class)]
        
    for i in range(len(classes)):
        if has_class[i]:
            window_name='img_'+str(imgno)+' class '+str(classes[i])
            rect=convert_to_coco(cv2.selectROI(window_name,img,showCrosshair=True),cv2.getWindowImageRect(window_name))
            cv2.destroyAllWindows()
            
            if not (rect[2] == 0 and rect[3] == 0):
                # centerx, centery, width, height
                data_string+=classes[i]+SEP+str(rect[0])+SEP+str(rect[1])+SEP+str(rect[2])+SEP+str(rect[3])+SEP+'\n'
    
    if data_string=='':
        data_string+='0'+SEP+'0'+SEP+'0'+SEP+'0'+SEP+'0'+SEP+'\n'
    
    dataset_file=open(os.path.join(label_dir,'img_'+str(imgno)+'.txt'),'a')
    print(data_string,file=dataset_file,flush=True)
    # print(data_string+'\n---\n')

In [4]:
# generates partitions from csv
def extract_partitions(partition_file,deletions_index=2):
    partitions_temp=np.genfromtxt(partition_file,delimiter=",",filling_values=0)[1:]
    
    # convert ms to frames
    for time in partitions_temp:
        time[0]=time[0]/1000*FPS
        time[1]=time[1]/1000*FPS
    
    deletions=[]
    for e in partitions_temp:
        if e[deletions_index]==1:
            deletions.append(e)
            
    # read in classes
    classes=np.genfromtxt(partition_file,delimiter=",",dtype=str,encoding='utf-8-sig')[0][3::]
    
    partitions=[]
    for p in partitions_temp:
        partitions.append(Partition(start=p[0],end=p[1],classes=p[3:]))

    return partitions,deletions,classes

# start, end in frames
class Partition:
    def __init__(self, start, end, classes):
        self.start=start
        self.end=end
        self.classes=classes

In [9]:
# extract images. frames_to_skip is used directly with the naive decimation below
# Modified from https://www.tutorialspoint.com/opencv_python/opencv_python_extract_images_video.htm 
def extract_images(frames_to_skip=100,max_imgs=100):
    frameno=0
    imgno=0

    while(imgno <= max_imgs):
        exists,img = cam.read()
        if exists:
            frameno += 1
            
            if wanted(frameno):
                cv2.imwrite(data_path(imgno), img)
                label(imgno,frameno)
                imgno+=1
                # print(frameno*1000/FPS)
            else:
                continue

            ignore_frames(frames_to_skip)
            frameno += frames_to_skip
        else:
            break

    cam.release()

# naive frame decimation. does not check for classified images, just indiscriminantly ignores numToIgnore frames
def ignore_frames(numToIgnore):
    numIgnored=0
    while numIgnored<numToIgnore:
        cam.read()
        numIgnored+=1

# Use

1. Make your partition file as specified above
1. Change the `data_dir` to your filepath
1. Set the partition file/file path
1. Set the video file
1. Set your decimation number (see the frame count, for reference)
1. Set max number of images, if needed
1. Run all cells
1. You will be prompted for bounding boxes. The class will be written at the top
    1. If it qualifies as the class, draw the bounding box by clicking and dragging. You can redraw if needed
    1. Press `space` or `Enter` to confirm
    1. If the image does not contain the class, press `c` or just don't draw a box and press `space` or `Enter`

In [10]:
cam = cv2.VideoCapture(video_file)
partitions,deletions,classes=extract_partitions(partition_file)
extract_images(frames_to_skip=100,max_imgs=100)

In [19]:
# use to alter label file for a specific image number, prompting for every class. WILL overwrite previous bounding boxes
def label_all_classes(imgno):
    img_name= data_path(imgno) #'data\\images\\img_'+str(imgno)+'.jpg'
    img = cv2.imread(img_name,cv2.IMREAD_UNCHANGED)
    data_string= ''
        
    for i in range(len(classes)):
        window_name='img_'+str(imgno)+' class '+str(classes[i])
        rect=convert_to_coco(cv2.selectROI(window_name,img,showCrosshair=True),cv2.getWindowImageRect(window_name))
        cv2.destroyAllWindows()

        if not (rect[2] == 0 and rect[3] == 0):
            # centerx, centery, width, height
            data_string+=classes[i]+SEP+str(rect[0])+SEP+str(rect[1])+SEP+str(rect[2])+SEP+str(rect[3])+SEP+'\n'
    
    if data_string=='':
        data_string+='empty'+SEP+'0'+SEP+'0'+SEP+'0'+SEP+'0'+SEP+'\n'
    dataset_file=open(os.path.join(label_dir,str(imgno)+'.txt'),'w')
    print(data_string,file=dataset_file,flush=True)
    print(data_string)

In [23]:
label_all_classes(68)

4 0.653125 0.68125 0.275 0.4013888888888889 



# Future improvements

- sort the csv prior to converting so you don't have to iterate through the rows of the input csv, you can just skip to the most recent time
- portions of videos support
- support for in situ deletion (like if the photo is bad, or includes a diver)
- partitioning in the notebook
- adding classes easily later, if they were missed by the partitions

### Making the `Partition` file

In [None]:
'''
Might be helpful in labelling your video partitions
    press q to pause the video for 5s while you write down the number
    the number in the corner is the time in ms

modified from https://www.geeksforgeeks.org/python-play-a-video-using-opencv/
'''

import time

cap=cv2.VideoCapture(video_file)
if (cap.isOpened()== False): 
    print("Error opening video file") 

frameno=0
# Read until video is completed 
while(cap.isOpened()): 
# Capture frame-by-frame 
    ret, frame = cap.read() 
    if ret == True: 
        frameno+=1
        frametime=frameno/FPS*1000
    # Display the resulting frame
        cv2.putText(frame, str((int)(frametime)), (80,80), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        cv2.imshow('Frame', frame) 
          
    # Press Q on keyboard to exit 
        if cv2.waitKey(25) & 0xFF == ord('q'): 
            time.sleep(5)
  
# Break the loop 
    else: 
        break
  
# When everything done, release 
# the video capture object 
cap.release() 
  
# Closes all the frames 
cv2.destroyAllWindows() 