In [1]:
%matplotlib inline

In [2]:
import numpy as np
import re
from utils import show_video
from glob import glob

In [3]:
DATASET_PATH = '/vision/group/ntu-rgbd/'

In [4]:
!ls $DATASET_PATH

100_few_shot_depth_train_ann.txt       flow_s005
100_few_shot_rgb_train_ann.txt	       flow_s006
100-few-shot-rgb-train-paths.npy       flow_s007
111				       flow_s008
48				       flow_s009
50_few_shot_depth_support_val_ann.txt  flow_s010
50_few_shot_depth_train_ann.txt        flow_s011
50_few_shot_depth_val_ann.txt	       flow_s012
50_few_shot_rgb_support_val_ann.txt    flow_s013
50-few-shot-rgb-support-val-paths.npy  flow_s014
50_few_shot_rgb_train_ann.txt	       flow_s015
50-few-shot-rgb-train-paths.npy        flow_s016
50_few_shot_rgb_val_ann.txt	       flow_s017
50-few-shot-rgb-val-paths.npy	       mask_s001
alan				       mask_s002
cflow_kmeans_s001		       mask_s003
cflow_kmeans_s002		       mask_s004
cflow_kmeans_s003		       mask_s005
cflow_kmeans_s004		       mask_s006
cflow_kmeans_s005		       mask_s007
cflow_kmeans_s006		       mask_s008
cflow_kmeans_s007		       mask_s009
cflow_kmeans_s008		       mask_s010
cflow_kmeans_s009		       mask_s011
cflow_kmeans_s010		       m

## Dataset structure
* `rgb_s0\*\*` defines a directory of actions performed by a single person (subject 0\*\*)
    * Within each of these subdirectories, the data is formatted as SxxxCxxxPxxxRxxxAxxx
    * S is the setup number (I believe this is the room/background)
    * C is camera ID
    * P is the performer (subject ID)
    * R is replication number (1 or 2 if done multiple times)
    * A is action class label

In [5]:
# Actions that are part of the "support set"
#A1, A7, A13, A19, A25, A31, A37, A43, A49, A55, A61, A67, A73, A79, A85, A91, A97, A103, A109, A115

In [6]:
!ls {DATASET_PATH + 'rgb_s001'}

S001C001P001R001A001  S001C002P001R001A001  S001C003P001R001A001
S001C001P001R001A002  S001C002P001R001A002  S001C003P001R001A002
S001C001P001R001A003  S001C002P001R001A003  S001C003P001R001A003
S001C001P001R001A004  S001C002P001R001A004  S001C003P001R001A004
S001C001P001R001A005  S001C002P001R001A005  S001C003P001R001A005
S001C001P001R001A006  S001C002P001R001A006  S001C003P001R001A006
S001C001P001R001A007  S001C002P001R001A007  S001C003P001R001A007
S001C001P001R001A008  S001C002P001R001A008  S001C003P001R001A008
S001C001P001R001A009  S001C002P001R001A009  S001C003P001R001A009
S001C001P001R001A010  S001C002P001R001A010  S001C003P001R001A010
S001C001P001R001A011  S001C002P001R001A011  S001C003P001R001A011
S001C001P001R001A012  S001C002P001R001A012  S001C003P001R001A012
S001C001P001R001A013  S001C002P001R001A013  S001C003P001R001A013
S001C001P001R001A014  S001C002P001R001A014  S001C003P001R001A014
S001C001P001R001A015  S001C002P001R001A015  S001C003P001R001A015
S001C001P001R001A016  S00

In [7]:
show_video(DATASET_PATH + 'rgb_s003/S003C001P001R001A001/')

In [7]:
# Cross-setup evaluation uses even setupIDs for training, odd setupIDs for testing
# Cross-subject evaluation is as follows:
# Person IDs for training:
#1, 2, 4, 5, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 27, 28, 31, 34, 35,
#38, 45, 46, 47, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 70, 74, 78,
#80, 81, 82, 83, 84, 85, 86, 89, 91, 92, 93, 94, 95, 97, 98, 100, 103.
# Rest are for testing
trainIDs = set([1, 2, 4, 5, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 27, 28, 31, 34, 35,
38, 45, 46, 47, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 70, 74, 78,
80, 81, 82, 83, 84, 85, 86, 89, 91, 92, 93, 94, 95, 97, 98, 100, 103])

In [26]:
# Gets the person id from a video filename string

support_set = [1, 7, 13, 19, 25, 31, 37, 43, 49, 55, 61, 67, 73, 79, 85, 91, 97, 103, 109, 115]

def get_person_id(video_file):
    p_idx = video_file.find("P")
    return int(video_file[p_idx+1:p_idx+4])

def is_training_set(video_file):
    return get_person_id(video_file) in trainIDs

def get_label(video_file):
    l_idx = video_file.find("A")
    return int(video_file[l_idx+1:]) - 1 # Action 1 corresponds to label 0

def get_num_frames(video_file):
    return len(glob(video_file + '/*'))

def get_alabel(video_file): # One more than the get_label (1 vs 0 for first index)
    l_idx = video_file.find("A")
    a_label = int(video_file[l_idx+1:])
    return a_label

def is_support_set(video_file):
    a_label = get_alabel(video_file)
    return a_label in support_set
# (S***P***R***A***)
def get_action_id(filename):
    start_idx = filename.find("/")
    c_idx = filename[start_idx:].find("C")
    return filename[start_idx:c_idx] + filename[c_idx+4:]

In [9]:
rgb_te_pths = np.load(DATASET_PATH + 'few-shot-rgb-val-paths.npy')
rgb_tr_pths = np.load(DATASET_PATH + 'few-shot-rgb-train-paths.npy')
rgb_sup_pths = np.load(DATASET_PATH + 'few-shot-rgb-support-paths.npy')
rgb_lst_pths = np.load(DATASET_PATH + 'few-shot-rgb-support-val-paths.npy')

In [20]:
# Current annotation:
curr_ann_filename = DATASET_PATH + 'few_shot_rgb_support_ann.txt'
with open(curr_ann_filename, "r") as f:
    lines = f.readlines()
labeled_paths = [line[:line.find(' ')] for line in lines]

In [21]:
print(len(labeled_paths))

50


In [22]:
unlabeled_paths = []
for path in rgb_sup_pths:
    if path not in labeled_paths:
        unlabeled_paths.append(path)

In [34]:
# Returns the action id, defined to be: (S***P***R***A***)
action_id2paths = {}
for filename in unlabeled_paths:
    action_id = get_action_id(filename)
    if action_id not in action_id2paths:
        action_id2paths[action_id] = []
    action_id2paths[action_id].append(filename)
action_ids = action_id2paths.keys()
action_id2small_id = {key:idx for idx, key in enumerate(action_ids)}

In [35]:
ordered_unlabeled_paths = []
for action_id in action_ids:
    paths = action_id2paths[action_id]
    for path in paths:
        ordered_unlabeled_paths.append(path)

In [36]:
print(len(ordered_unlabeled_paths))

6653


In [39]:
!pip install tqdm

Collecting tqdm
  Downloading tqdm-4.63.0-py2.py3-none-any.whl (76 kB)
[K     |████████████████████████████████| 76 kB 3.0 MB/s eta 0:00:011
[?25hInstalling collected packages: tqdm
Successfully installed tqdm-4.63.0


In [42]:
from tqdm import tqdm

In [43]:
# Generating support annotation file
num_empty_vids = 0
ann_filename = DATASET_PATH + 'few_shot_rgb_unlabeled_ann.txt'
with open(ann_filename, "w") as f:
    for path in tqdm(ordered_unlabeled_paths):
        num_frames = get_num_frames(path)
        label = action_id2small_id[get_action_id(path)]
        if num_frames > 0:
            f.write(path + ' ' + str(num_frames) + ' ' + str(label) + '\n')
        else:
            num_empty_vids += 1
print("Emtpy vids:", num_empty_vids)

100%|██████████| 6653/6653 [01:09<00:00, 96.10it/s]  

Emtpy vids: 0





In [44]:
# Verify
with open(ann_filename, "r") as f:
    lines = f.readlines()
print(len(lines))
print(lines[:6])

6653
['/vision/group/ntu-rgbd/rgb_s007/S007C002P028R001A031 50 0\n', '/vision/group/ntu-rgbd/rgb_s007/S007C003P028R001A031 50 0\n', '/vision/group/ntu-rgbd/rgb_s007/S007C001P028R001A031 50 0\n', '/vision/group/ntu-rgbd/rgb_s007/S007C001P027R001A013 112 1\n', '/vision/group/ntu-rgbd/rgb_s007/S007C003P027R001A013 114 1\n', '/vision/group/ntu-rgbd/rgb_s007/S007C002P027R001A013 113 1\n']


In [45]:
# Replace rgb_s*** with depth_s***
def rgb2depth(filename):
    return filename.replace('rgb_s', 'depth_s')

In [46]:
rgb_ann_filename = DATASET_PATH + 'few_shot_rgb_unlabeled_ann.txt'
depth_ann_filename = DATASET_PATH + 'few_shot_depth_unlabeled_ann.txt'
with open(rgb_ann_filename, "r") as f:
    with open(depth_ann_filename, "w") as w:
        for line in tqdm(f.readlines()):
            new_line = rgb2depth(line)
            w.write(new_line)

100%|██████████| 6653/6653 [00:00<00:00, 443600.74it/s]


In [47]:
# Verify
with open(depth_ann_filename, "r") as f:
    lines = f.readlines()
print(len(lines))
print(lines[:6])

6653
['/vision/group/ntu-rgbd/depth_s007/S007C002P028R001A031 50 0\n', '/vision/group/ntu-rgbd/depth_s007/S007C003P028R001A031 50 0\n', '/vision/group/ntu-rgbd/depth_s007/S007C001P028R001A031 50 0\n', '/vision/group/ntu-rgbd/depth_s007/S007C001P027R001A013 112 1\n', '/vision/group/ntu-rgbd/depth_s007/S007C003P027R001A013 114 1\n', '/vision/group/ntu-rgbd/depth_s007/S007C002P027R001A013 113 1\n']


## THE REST OF THIS NOTEBOOK WAS NOT USED

In [10]:
#A1, A7, A13, A19, A25, A31, A37, A43, A49, A55, A61, A67, A73, A79, A85, A91, A97, A103, A109, A115

In [11]:
is_support_set(DATASET_PATH + 'rgb_s003/S003C001P001R001A001')

True

In [12]:
get_person_id(DATASET_PATH + 'rgb_s003/S003C001P001R001A001/')

1

In [13]:
is_training_set(DATASET_PATH + 'rgb_s003/S003C001P002R001A001/')

True

In [16]:
print(len(train_paths), len(test_paths), len(support_paths), len(low_shot_test_paths))

33526 13712 6703 2744


In [17]:
len(train_paths)

33526

In [18]:
len(test_paths)

13712

In [19]:
# Number checks out from original paper
print("Total videos:", len(train_paths) + len(test_paths))

Total videos: 47238


In [20]:
rgb_tr_pths = train_paths
rgb_te_pths = test_paths
rgb_sup_pths = support_paths
rgb_lst_pths = low_shot_test_paths

In [23]:
# Grab first k examples for the support class:
k = 5
final_sup_pths = []
id2num= {}
for i in support_set:
    id2num[i] = 0

for path in rgb_sup_pths:
    label = get_alabel(path)
    if id2num[label] < k:
        final_sup_pths.append(path)
        id2num[label] += 1

In [84]:
print(len(final_sup_pths),k * len(support_set)) # Will be equal for NTU RGBD 120, half for NTU RGBD 60

50 100


In [88]:
# Generate training annotation file
num_empty_vids = 0
ann_filename = DATASET_PATH + 'few_shot_rgb_train_ann.txt'
with open(ann_filename, "w") as f:
    for path in rgb_tr_pths:
        num_frames = get_num_frames(path)
        label = get_label(path)
        if num_frames > 0:
            f.write(path + ' ' + str(num_frames) + ' ' + str(label) + '\n')
        else:
            num_empty_vids += 1
print("Emtpy vids:", num_empty_vids)

Emtpy vids: 11


In [89]:
# Generating validation annotation file
num_empty_vids = 0
ann_filename = DATASET_PATH + 'few_shot_rgb_val_ann.txt'
with open(ann_filename, "w") as f:
    for path in rgb_te_pths:
        num_frames = get_num_frames(path)
        label = get_label(path)
        if num_frames > 0:
            f.write(path + ' ' + str(num_frames) + ' ' + str(label) + '\n')
        else:
            num_empty_vids += 1
print("Emtpy vids:", num_empty_vids)

Emtpy vids: 8


In [95]:
# Generating support annotation file
num_empty_vids = 0
ann_filename = DATASET_PATH + 'few_shot_rgb_support_ann.txt'
with open(ann_filename, "w") as f:
    for path in final_sup_pths:
        num_frames = get_num_frames(path)
        label = get_label(path)
        if num_frames > 0:
            f.write(path + ' ' + str(num_frames) + ' ' + str(label) + '\n')
        else:
            num_empty_vids += 1
print("Emtpy vids:", num_empty_vids)

Emtpy vids: 0


In [91]:
# Generating few shot test file
num_empty_vids = 0
ann_filename = DATASET_PATH + 'few_shot_rgb_support_test_ann.txt'
with open(ann_filename, "w") as f:
    for path in  rgb_lst_pths:
        num_frames = get_num_frames(path)
        label = get_label(path)
        if num_frames > 0:
            f.write(path + ' ' + str(num_frames) + ' ' + str(label) + '\n')
        else:
            num_empty_vids += 1
print("Emtpy vids:", num_empty_vids)

Emtpy vids: 0


In [94]:
print(len(rgb_te_pths), len(rgb_tr_pths), len(final_sup_pths), len(rgb_lst_pths))

13712 33526 50 2744
