In [1]:
import cv2
import numpy as np
from numpy.core.numeric import True_
from numpy.lib.arraysetops import unique
import pandas as pd
# import glob
import os
from pandas import json_normalize
from os import getcwd, path
from yaml import SafeLoader, load
import datetime
import matplotlib.pyplot as plt
import yaml
import shutil
import json

from video_utils_virat import VideoUtils

%matplotlib

Using matplotlib backend: TkAgg


## Dataset Selection

In [2]:

# setup
dataset_dir_path = './datasets/VIRAT/'
image_ext = '.jpg'
video_max_frames = 2000

#video
video_ext = '.mp4'
video_name = 'VIRAT_S_000008'
video_name_orig = video_name + video_ext
video_dest_path = './' + video_name + '/'
video_src_path = dataset_dir_path + 'Videos/Ground/'

using_yml = True

# annotations
saved_csv = video_dest_path + 'df_bbox.csv'

if using_yml:
  video_name_new = 'ann_yml_'

  annotations_path = dataset_dir_path + 'viratannotations/train/' + video_name +'/'
  annotations_path = dataset_dir_path + 'viratannotations/validate/' + video_name +'/'
  ann_activities_file = annotations_path + video_name + '.activities.yml'
  ann_geom_file = annotations_path + video_name + '.geom.yml'
  ann_regions_file = annotations_path + video_name + '.regions.yml'
  ann_types_file = annotations_path + video_name + '.types.yml'

else:
  video_name_new = 'ann_txt_'
  annotations_path = dataset_dir_path + 'annotations/'
  events_file = annotations_path + video_name + '.viratdata.events.txt'
  objects_file = annotations_path + video_name + '.viratdata.objects.txt'
  mapping_file = annotations_path + video_name + '.viratdata.mapping.txt'
  print(objects_file)

video_name_new = video_name_new + video_name + '.avi'#video_ext

## Event File Columns 
- 1: event ID        (unique identifier per event within a clip, same eid can exist on different clips)
- 2: event type      (event type)
- 3: duration        (event duration in frames)
- 4: start frame     (start frame of the event)
- 5: end frame       (end frame of the event)
- 6: current frame   (current frame number)
- 7: bbox lefttop x  (horizontal x coordinate of left top of bbox, origin is lefttop of the frame)
- 8: bbox lefttop y  (vertical y coordinate of left top of bbox, origin is lefttop of the frame)
- 9: bbox width      (horizontal width of the bbox)
- 10: bbox height    (vertical height of the bbox)

### Event Type ID (for column 2 above)
- 1: Person loading an Object to a Vehicle
- 2: Person Unloading an Object from a Car/Vehicle
- 3: Person Opening a Vehicle/Car Trunk
- 4: Person Closing a Vehicle/Car Trunk
- 5: Person getting into a Vehicle
- 6: Person getting out of a Vehicle
- 7: Person gesturing
- 8: Person digging
- 9: Person carrying an object
- 10: Person running
- 11: Person entering a facility
- 12: Person exiting a facility

## Object File Columns
- 1: Object id        (a unique identifier of an object track. Unique within a file.)
- 2: Object duration  (duration of the object track)
- 3: Currnet frame    (corresponding frame number)
- 4: bbox lefttop x   (horizontal x coordinate of the left top of bbox, origin is lefttop of the frame)
- 5: bbox lefttop y   (vertical y coordinate of the left top of bbox, origin is lefttop of the frame)
- 6: bbox width       (horizontal width of the bbox)
- 7: bbox height      (vertical height of the bbox)
- 8: Objct Type       (object type)

### Object Type ID (for column 8 above for object files)
- 1: person
- 2: car              (usually passenger vehicles such as sedan, truck)
- 3: vehicles         (vehicles other than usual passenger cars. Examples include construction vehicles)
- 4: object           (neither car or person, usually carried objects)
- 5: bike, bicylces   (may include engine-powered auto-bikes)

## Mapping File Columns
- 1: event ID         (unique event ID, points to column 1 of event file)
- 2: event type       (event type, points to column 2 of event file)
- 3: event duration   (event duration, points to column 3 of event file)
- 4: start frame      (start frame of event)
- 5: end frame        (end frame of event)
- 6: number of obj    (total number of associated objects)
- 7-end:              (variable number of columns which captures the associations maps for variable number of objects in the clip. 
                     If '1', the event is associated with the object. Otherwise, if '0', there's none.
                     The corresponding oid in object file can be found by 'column number - 7')


In [3]:
# Create directory to store new video
if not os.path.exists(video_dest_path):
    os.makedirs(video_dest_path)

if using_yml:
    if not os.path.exists(saved_csv):
        with open(ann_types_file) as yaml_file:
            yaml_contents = load(yaml_file, Loader=SafeLoader)
        yaml_df = json_normalize(yaml_contents)
        yaml_df
        for col in yaml_df.columns:
            type_name = col.split('.')[-1]
            if not (type_name == 'id1'):
                yaml_df.loc[yaml_df[col] == 1, col] = type_name
        
        yaml_df = yaml_df[yaml_df['types.id1'].notna()].reset_index().dropna(axis=1, how='all')  
        type_df = yaml_df.ffill(axis=1).iloc[:,-1].to_frame(name='category')
        type_df.insert(0, "id", yaml_df['types.id1'])
        type_df


In [4]:
if not using_yml:
  categories = {'person':1,'car':2,'vehicles':3,'object':4, 'bike':5}
  header_list = ['id', 'duration', 'frame_id','bb_left','bb_top','bb_width','bb_height','category']
  df_bbox = pd.read_csv(objects_file, sep=" ", header=None, names=header_list)
  def update_category(row):
    category = row['category']
    for k, v in categories.items():
      if v == category:
        return k
    return 'Undefined'

  def add_bbox_val(origin, delta):
    return origin + delta
    
  df_bbox['category'] = df_bbox.apply(lambda row: update_category(row), axis=1) 
  df_bbox['bb_right'] = df_bbox.apply(lambda row: add_bbox_val(row['bb_left'], row['bb_width']), axis=1) 
  df_bbox['bb_bottom'] = df_bbox.apply(lambda row: add_bbox_val(row['bb_top'], row['bb_height']), axis=1) 
  df_bbox   
else:
    # using annotations:
  print("Loading annotations...")
  def add_category_type(row):
    id = row['object_id']
    val = type_df.loc[type_df['id'] == id, 'category'].iloc[0]
    return val


  if os.path.exists(saved_csv):
    df_bbox = pd.read_csv(saved_csv)
  else:
    with open(ann_geom_file) as yaml_file:
        yaml_contents = load(yaml_file, Loader=SafeLoader)
    yaml_df = json_normalize(yaml_contents)

    df_bbox = yaml_df[['geom.id1','geom.ts0','geom.ts1','geom.g0']].dropna().reset_index()
    df_bbox.rename(columns={'geom.id1': 'object_id', 'geom.ts0': 'frame_id','geom.ts1': 'time_sec', 'geom.g0': 'bbox'}, inplace=True)
    df_bbox['bbox'] = df_bbox['bbox'].str.split()
    df_tmp = pd.DataFrame(df_bbox['bbox'].to_list(), columns = ['bb_left', 'bb_top', 'bb_right', 'bb_bottom'])
    df_bbox = pd.concat([df_bbox, df_tmp], axis=1).drop(columns=['bbox'])

    df_bbox['category'] = df_bbox.apply(lambda row: add_category_type(row), axis=1) 
    df_bbox.drop(columns=['index'], axis=1, inplace=True)
    # df_bbox.set_index['index'] 
    df_bbox.to_csv(saved_csv, index = False)
    

df_bbox.head()

Loading annotations...


Unnamed: 0,object_id,frame_id,time_sec,bb_left,bb_top,bb_right,bb_bottom,category
0,0.0,0.0,0.0,368,659,426,799,Person
1,0.0,1.0,0.033333,367,658,426,799,Person
2,0.0,2.0,0.066667,367,658,426,799,Person
3,0.0,3.0,0.1,367,658,426,799,Person
4,0.0,4.0,0.133333,367,658,426,799,Person


# Add velocities

In [5]:
# Add velocities for the video

vidcap = cv2.VideoCapture(video_src_path + video_name_orig)
fps = 30
scale = 1/15
if vidcap.isOpened():
    fps = vidcap.get(cv2.CAP_PROP_FPS)  
    # scale = #TBD


def find_mid(a, b):
  c = (a.astype(float) + b.astype(float))/2
  return c

def find_delta(x):
  dx = x.diff()
  dx[0] = np.nan
  dx.fillna(method='backfill', inplace=True)
  return dx

def find_vel(vx,vy):

  vel = np.sqrt(vx * vx + vy * vy).round(2)
  return vel#, vx, vy

for id in df_bbox['object_id'].unique():
  mask = (df_bbox['object_id']==id)
  sub_df = df_bbox[mask]
  x  = find_mid(sub_df['bb_top'], sub_df['bb_bottom'])
  y  = find_mid(sub_df['bb_left'], sub_df['bb_right'])
  dx = find_delta(x)
  dy = find_delta(y)
  vx = (dx * fps * scale)
  vy = (dy * fps * scale)

  df_bbox.loc[mask, 'x']  = x
  df_bbox.loc[mask, 'y']  = y
  df_bbox.loc[mask, 'dx'] = dx
  df_bbox.loc[mask, 'dy'] = dy
  df_bbox.loc[mask, 'vx'] = (dx * fps * scale)
  df_bbox.loc[mask, 'vy'] = (dy * fps * scale)
  df_bbox.loc[mask, 'vel']  = np.sqrt(vx * vx + vy * vy).round(2)

df_bbox.head()

## Annotate Video

In [6]:
from video_utils_virat import VideoUtils
from video_utils_virat import DirectoryUtils
# vUtils = VideoUtils(categoriesDict) 
types_lst = df_bbox['category'].unique()
types_dict = {}
for i in range(len(types_lst)):
  types_dict[types_lst[i]] = i

vUtils = VideoUtils(types_dict) 
drUtils = DirectoryUtils()

start_time = 0
vUtils.AnnotateVideo(video_dest_path, video_src_path + video_name_orig, video_name_new, df_bbox, start_time)

drUtils.ClearFileType(video_dest_path,".jpg")
# shutil.move(video_src_path + video_name_orig, video_src_path + 'UsedVideos/' + video_name_orig)

{'Person': 0, 'Vehicle': 1, 'Prop': 2, 'Other': 3}
Total frames in video: 5805 @ 29.97 frames/sec
running full video
5805.0 0 5805.0
Created frame id  0, 0.00 sec in video; completed:  0.0 %
Created frame id 25, 0.83 sec in video; completed:  0.4 %
Created frame id 50, 1.67 sec in video; completed:  0.9 %
Created frame id 75, 2.50 sec in video; completed:  1.3 %
Created frame id 100, 3.34 sec in video; completed:  1.7 %
Created frame id 125, 4.17 sec in video; completed:  2.2 %
Created frame id 150, 5.00 sec in video; completed:  2.6 %
Created frame id 175, 5.84 sec in video; completed:  3.0 %
Created frame id 200, 6.67 sec in video; completed:  3.4 %
Created frame id 225, 7.51 sec in video; completed:  3.9 %
Created frame id 250, 8.34 sec in video; completed:  4.3 %
Created frame id 275, 9.18 sec in video; completed:  4.7 %
Created frame id 300, 10.01 sec in video; completed:  5.2 %
Created frame id 325, 10.84 sec in video; completed:  5.6 %
Created frame id 350, 11.68 sec in video; c

In [None]:
# import pandas as pd
# import math
# pd.options.mode.chained_assignment = None  # default='warn'



# # df_bbox['x']=df_bbox.apply(lambda x: find_mid(x['bb_top'], x['bb_bottom']), axis=1)
# # df_bbox['y']=df_bbox.apply(lambda x: find_mid(x['bb_left'], x['bb_right']), axis=1)
# # df_bbox['vx'] = df_bbox['x'].groupby('object id').diff()
# # df_bbox['vy'] = df_bbox['y'].diff()
# # df_bbox.drop(columns=['vx','vy','x','y'], axis=1, inplace=True)
# # df_bbox.drop(columns=['vel'], axis=1, inplace=True)

# fps = 30
# scale = 1/320

# def find_mid(a, b):
#   c = (a.astype(float) + b.astype(float))/2
#   return c

# def find_delta(x):
#   dx = x.diff()
#   dx[0] = np.nan
#   dx.fillna(method='backfill', inplace=True)
#   return dx

# def find_vel(vx,vy):

#   vel = np.sqrt(vx * vx + vy * vy).round(2)
#   return vel#, vx, vy

# for id in df_bbox['object id'].unique():
#   mask = (df_bbox['object id']==id)
#   sub_df = df_bbox[mask]
#   x  = find_mid(sub_df['bb_top'], sub_df['bb_bottom'])
#   y  = find_mid(sub_df['bb_left'], sub_df['bb_right'])
#   dx = find_delta(x)
#   dy = find_delta(y)
#   vx = (dx * fps * scale)
#   vy = (dy * fps * scale)

#   df_bbox.loc[mask, 'x']  = x
#   df_bbox.loc[mask, 'y']  = y
#   df_bbox.loc[mask, 'dx'] = dx
#   df_bbox.loc[mask, 'dy'] = dy
#   df_bbox.loc[mask, 'vx'] = (dx * fps * scale)
#   df_bbox.loc[mask, 'vy'] = (dy * fps * scale)
#   df_bbox.loc[mask, 'vel']  = np.sqrt(vx * vx + vy * vy).round(2)
 

# df_bbox.head()


In [None]:
# bb_data = df_bbox['bbox'].to_numpy()
# count = 0
# max_ann = 0
# ann_cnt = 0
# df_center = pd.DataFrame([])
# df2  = pd.DataFrame([])
# frame_cnt = len(bb_data)
# for i in range(frame_cnt):
#   ann_cnt = len(bb_data[i])
#   lst_x = np.array([])
#   lst_y = np.array([])

#   for j in range(ann_cnt):
#     bbox = bb_data[i][j]
#     top = bbox['top']
#     left = bbox['left']
#     height = bbox['height']
#     width = bbox['width']
#     categories = bbox['class']
#     bottom = top + height
#     right = left + width
#     center_x = int(left + width/2)
#     center_y = int(top + height/2)
#     center = (center_x, center_y)

#     lst_x = np.append(lst_x, center_x)
#     lst_y = np.append(lst_y, center_y)
#     count += 1
#     max_ann = max(max_ann, ann_cnt)

#   lst_tuple = list(zip(lst_x,lst_y))
#   print(count, ann_cnt, max_ann, lst_tuple)
#   #   df2[i] = bbox
#   # df_center = df_center.append(df2, ignore_index = True)
# # print(i, df_center)

# # velociy_pix [pixel/frame]
# # frameRate   [frame/sec]
# # scale       [m/pixel]
# # velocity = velociy_pix * frameRate * scale; #   [m/sec] = [pixel/frame] * [frame/sec] * [m/pixel]


In [None]:
# import datashader as ds
# import pandas as pd
# import colorcet
# # df  = pd.read_csv('census.csv')
# cvs = ds.Canvas(plot_width=850, plot_height=500)
# agg = cvs.points(df_json, 'longitude', 'latitude')
# img = ds.tf.shade(agg, cmap=colorcet.fire, how='log')