## Extract diving pose gestures

In [4]:
import pandas as pd

In [5]:
chalearn_path = "/home/vlados/datasets/chalearn/"
chalearn_labels_path = chalearn_path + "Info_devel_valid.txt"

In [6]:
chalearn_labels_df = pd.read_csv(chalearn_labels_path, sep="\t")

In [7]:
chalearn_labels_df.head()

Unnamed: 0,Set,Num,Lexicon,UserID,Date,MinDepth,MaxDepth,DepthRes,DepthAcc,Missing
0,devel,1,CanadaAviationGroundCirculation1,A,2011 10 05 16 26,801,1964,76,2,0
1,devel,2,RefereeWrestlingSignals1,I,2011 09 28 14 31,801,1968,71,1,0
2,devel,3,GangHandSignals1,L,2011 09 27 12 16,801,1506,95,1,0
3,devel,4,DivingSignals2,J,2011 09 26 09 01,801,1869,100,1,0
4,devel,5,GestunoDisaster,L,2011 10 11 15 16,824,1964,91,2,0


In [8]:
chlearn_diving_labels_df = chalearn_labels_df.loc[chalearn_labels_df["Lexicon"].isin(["DivingSignals1", "DivingSignals2"])].drop(chalearn_labels_df.loc[:,"Date":], axis = 1)
chlearn_diving_labels_df

Unnamed: 0,Set,Num,Lexicon,UserID
3,devel,4,DivingSignals2,J
19,devel,20,DivingSignals1,D
22,valid,3,DivingSignals2,Z
45,devel,26,DivingSignals2,L
54,devel,35,DivingSignals1,O
62,devel,43,DivingSignals2,L
71,devel,52,DivingSignals1,I
90,devel,71,DivingSignals1,R
92,devel,73,DivingSignals1,H
98,devel,79,DivingSignals2,M


In [9]:
import os

In [10]:
def find_dir(number, path, name):
    for dirname in os.listdir(path):
        splitted = dirname.split("-")
        if splitted[0] != name:
            continue
        if (int(splitted[1]) < number <= int(splitted[2])):
            subpath = os.path.join(path, dirname)
            for subdirname in os.listdir(subpath):
                subsplitted = subdirname.split(name)
                if subsplitted[0] != "":
                    continue
                if int(subsplitted[1]) == number:
                    dest_path = os.path.join(subpath, subdirname)
                    for dest_file in os.listdir(dest_path):
                        if dest_file.split(".")[1] == "csv":
                            yield dest_path, dest_file
                            

In [11]:
df = pd.DataFrame(columns=["name", "label", "path"])
for index, row in chlearn_diving_labels_df.iterrows():
    for dest_path, csv_file  in find_dir(row["Num"], chalearn_path, row["Set"]):
        df_set = pd.read_csv(os.path.join(dest_path, csv_file), names=["name", "label"])
        df_set["path"] = dest_path
        df_set["lexicon"] = row["Lexicon"]
        df_set["user"] = row["UserID"]
        for subindex, subrow in df_set.iterrows():
            df_set.at[subindex, "path"] = os.path.join(subrow.at["path"], "M_" + subrow.at["name"].split("_")[1] + ".avi")
        df = pd.concat([df, df_set], ignore_index=True, sort=False)
df

Unnamed: 0,name,label,path,lexicon,user
0,devel04_11,3,/home/vlados/datasets/chalearn/devel-1-20/deve...,DivingSignals2,J
1,devel04_12,1,/home/vlados/datasets/chalearn/devel-1-20/deve...,DivingSignals2,J
2,devel04_13,2 7,/home/vlados/datasets/chalearn/devel-1-20/deve...,DivingSignals2,J
3,devel04_14,2,/home/vlados/datasets/chalearn/devel-1-20/deve...,DivingSignals2,J
4,devel04_15,9 5 4,/home/vlados/datasets/chalearn/devel-1-20/deve...,DivingSignals2,J
...,...,...,...,...,...
1321,devel475_5,9,/home/vlados/datasets/chalearn/devel-366-480/d...,DivingSignals1,M
1322,devel475_6,8,/home/vlados/datasets/chalearn/devel-366-480/d...,DivingSignals1,M
1323,devel475_7,5,/home/vlados/datasets/chalearn/devel-366-480/d...,DivingSignals1,M
1324,devel475_8,4,/home/vlados/datasets/chalearn/devel-366-480/d...,DivingSignals1,M


## Extract poses from each video

In [9]:
import numpy as np
import cv2
import os
import torch
import torch.nn as nn

os.sys.path.append('poseEstimation')
from poseEstimation.demo import infer_fast, VideoReader
from poseEstimation.modules.pose import Pose
from poseEstimation.modules.load_state import load_state
from poseEstimation.modules.keypoints import extract_keypoints, group_keypoints
from poseEstimation.models.with_mobilenet import PoseEstimationWithMobileNet

In [10]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# DEVICE = "cpu"
print("Using device: " + DEVICE)
if torch.backends.cudnn.is_available():
    torch.backends.cudnn.enabled = True

SEED = 42

np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

net = PoseEstimationWithMobileNet()
checkpoint = torch.load(
    "weights/checkpoint_iter_370000.pth", map_location='cpu')
load_state(net, checkpoint)

Using device: cuda


In [27]:
column_names = [  "j0_x",  "j0_y", "j1_x", "j1_y" , "j2_x", "j2_y", "j3_x", "j3_y", "j4_x", "j4_y", "j5_x", "j5_y", "j6_x", "j6_y", "j7_x", "j7_y", "j8_x", "j8_y", "j9_x", "j9_y", "j10_x", "j10_y", "j11_x", "j11_y", "j12_x", "j12_y", "j13_x", "j13_y", 'j14_x', "j14_y", "j15_x", "j15_y", "j16_x", "j16_y", "j17_x", "j17_y" ]

In [12]:
def infer(net, image_provider, height_size, cpu):
    net = net.eval()
    if not cpu:
        net = net.cuda()

    stride = 8
    upsample_ratio = 4
    num_keypoints = Pose.num_kpts

    pose_sequence = []
    prediction = 0
    prediction_made = False

    for img in image_provider:
        heatmaps, pafs, scale, pad = infer_fast(
            net, img, height_size, stride, upsample_ratio, cpu)

        total_keypoints_num = 0
        all_keypoints_by_type = []
        for kpt_idx in range(num_keypoints):  # 19th for bg
            total_keypoints_num += extract_keypoints(
                heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num)

        pose_entries, all_keypoints = group_keypoints(
            all_keypoints_by_type, pafs)
        for kpt_id in range(all_keypoints.shape[0]):
            all_keypoints[kpt_id, 0] = (
                all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale
            all_keypoints[kpt_id, 1] = (
                all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale
        current_poses = []
        for n in range(len(pose_entries)):
            if len(pose_entries[n]) == 0:
                continue
            pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1
            for kpt_id in range(num_keypoints):
                
                if pose_entries[n][kpt_id] != -1.0:  # keypoint was found
                    pose_keypoints[kpt_id, 0] = int(
                        all_keypoints[int(pose_entries[n][kpt_id]), 0])
                    pose_keypoints[kpt_id, 1] = int(
                        all_keypoints[int(pose_entries[n][kpt_id]), 1])
                else:
                    pose_keypoints[kpt_id, 0] = 0
                    pose_keypoints[kpt_id, 1] = 0
            pose = Pose(pose_keypoints, pose_entries[n][18])
            
            current_poses.append(pose)

        if (len(current_poses) > 0):
            pose_sequence.append(current_poses[0].keypoints.reshape([36]))

    return pose_sequence

In [19]:
poses_df = pd.DataFrame(columns=column_names +["name"])
for index, row in df.iterrows():
    if index == 10:
        break
    print(row["name"])
    frame_provider = VideoReader(row["path"])
    pose_sequence = infer(net, frame_provider, 256, False)
    pose_df = pd.DataFrame(pose_sequence, columns=column_names)
    pose_df["name"] = row["name"]
    poses_df = pd.concat([poses_df, pose_df], ignore_index=True, sort=False)

devel04_11
devel04_12
devel04_13
devel04_14
devel04_15
devel04_16
devel04_17
devel04_18
devel04_19
devel04_20


In [22]:
poses_df.to_csv("poses_from_videos.csv", sep="\t", index=False)

## Load extracted poses from csv

In [12]:
poses_df = pd.read_csv("poses_from_videos.csv", sep="\t")

In [47]:
poses_df

Unnamed: 0,j0_x,j0_y,j1_x,j1_y,j2_x,j2_y,j3_x,j3_y,j4_x,j4_y,...,j13_y,j14_x,j14_y,j15_x,j15_y,j16_x,j16_y,j17_x,j17_y,name
0,132,80,130,110,100,108,94,150,117,165,...,0,126,75,137,75,117,76,147,76,devel04_1
1,132,80,128,112,100,110,90,150,115,170,...,0,126,75,137,75,117,76,145,76,devel04_1
2,132,80,128,112,100,110,89,150,109,176,...,0,126,75,137,75,117,76,145,76,devel04_1
3,132,80,128,112,100,110,89,153,104,185,...,0,126,75,137,75,117,76,145,76,devel04_1
4,132,80,128,112,98,112,87,155,98,196,...,0,126,75,137,75,119,76,147,78,devel04_1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104503,173,82,173,121,143,118,122,163,113,146,...,0,167,78,180,76,160,84,190,84,devel475_9
104504,173,82,173,121,143,120,126,161,115,165,...,0,167,78,180,76,160,84,190,84,devel475_9
104505,173,82,173,121,143,120,128,161,122,187,...,0,167,78,180,76,160,84,190,84,devel475_9
104506,173,82,173,121,143,120,128,161,124,195,...,0,167,78,180,76,160,84,190,84,devel475_9


### Find the smallest number of frames for one gesture

In [51]:
def count_min_frames_for_gesture(filenames_df, poses_df):
    min_frames_count = 100
    for index, row in filenames_df.iterrows():
        num_frames = poses_df.loc[poses_df["name"] == row["name"]].shape[0]
        if (isinstance(row["label"], str)):
            splitted_len = len(row["label"].split(" ")) - 1
            num_frames = poses_df.loc[poses_df["name"] == row["name"]].shape[0] / splitted_len
        if num_frames < min_frames_count:
            min_frames_count = num_frames
            print(min_frames_count, row["name"], row["user"])  
    return min_frames_count       

In [52]:
count_min_frames_for_gesture(df,poses_df)

78.0 devel04_11 J
38.0 devel04_12 J
35.0 devel04_15 J
33.0 devel04_16 J
32.8 devel04_25 J
32.0 devel04_44 J
30.0 devel20_10 D
21.25 devel20_11 D
20.333333333333332 devel20_18 D
18.25 devel98_38 B
16.6 devel181_23 B
15.333333333333334 devel181_26 B
13.333333333333334 devel361_34 D


13.333333333333334

### Cut first 1 second from "D" user

In [17]:
df.loc[df["user"] == "D"]

Unnamed: 0,name,label,path,lexicon,user
47,devel20_10,5,/home/vlados/datasets/chalearn/devel-1-20/deve...,DivingSignals1,D
48,devel20_11,9 9 7 2,/home/vlados/datasets/chalearn/devel-1-20/deve...,DivingSignals1,D
49,devel20_12,3 3 9 2 8,/home/vlados/datasets/chalearn/devel-1-20/deve...,DivingSignals1,D
50,devel20_13,2,/home/vlados/datasets/chalearn/devel-1-20/deve...,DivingSignals1,D
51,devel20_14,7 7,/home/vlados/datasets/chalearn/devel-1-20/deve...,DivingSignals1,D
...,...,...,...,...,...
1133,devel361_43,7 8,/home/vlados/datasets/chalearn/devel-251-365/d...,DivingSignals2,D
1134,devel361_44,10,/home/vlados/datasets/chalearn/devel-251-365/d...,DivingSignals2,D
1135,devel361_45,7,/home/vlados/datasets/chalearn/devel-251-365/d...,DivingSignals2,D
1136,devel361_46,5 1 5 6,/home/vlados/datasets/chalearn/devel-251-365/d...,DivingSignals2,D


In [28]:
D_poses_df = pd.DataFrame(columns=column_names + ["name"])
for index, row in df.loc[df["user"] == "D"].iterrows():
    D_poses_df = pd.concat([D_poses_df, poses_df.loc[poses_df["name"] == row["name"]].iloc[9:]], ignore_index=True, sort=False)
D_poses_df

Unnamed: 0,j0_x,j0_y,j1_x,j1_y,j2_x,j2_y,j3_x,j3_y,j4_x,j4_y,...,j13_y,j14_x,j14_y,j15_x,j15_y,j16_x,j16_y,j17_x,j17_y,name
0,162,84,162,114,135,114,98,150,111,116,...,0,158,78,167,78,149,84,175,84,devel20_10
1,162,86,162,114,135,114,94,136,109,99,...,0,156,80,167,80,149,84,175,84,devel20_10
2,162,86,162,112,135,112,92,114,113,76,...,0,156,80,167,80,149,84,175,86,devel20_10
3,162,84,162,108,135,110,96,95,122,58,...,0,158,78,167,78,149,86,175,86,devel20_10
4,162,84,162,108,137,108,100,86,128,52,...,0,158,78,167,78,149,84,175,84,devel20_10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4312,152,84,154,114,128,116,117,153,122,168,...,0,147,78,158,78,139,84,167,84,devel361_47
4313,152,84,154,114,128,116,117,153,120,166,...,0,147,80,158,78,139,84,167,84,devel361_47
4314,152,84,156,114,128,116,117,153,120,166,...,0,147,78,158,78,139,84,167,84,devel361_47
4315,152,84,154,114,128,116,115,153,122,165,...,0,147,78,158,78,139,84,167,84,devel361_47


### Cut first and lact 1 second from "J" user

In [38]:
df.loc[df["user"] == "J"]

Unnamed: 0,name,label,path,lexicon,user
0,devel04_11,3,/home/vlados/datasets/chalearn/devel-1-20/deve...,DivingSignals2,J
1,devel04_12,1,/home/vlados/datasets/chalearn/devel-1-20/deve...,DivingSignals2,J
2,devel04_13,2 7,/home/vlados/datasets/chalearn/devel-1-20/deve...,DivingSignals2,J
3,devel04_14,2,/home/vlados/datasets/chalearn/devel-1-20/deve...,DivingSignals2,J
4,devel04_15,9 5 4,/home/vlados/datasets/chalearn/devel-1-20/deve...,DivingSignals2,J
...,...,...,...,...,...
804,devel199_5,2,/home/vlados/datasets/chalearn/devel-136-250/d...,DivingSignals1,J
805,devel199_6,4,/home/vlados/datasets/chalearn/devel-136-250/d...,DivingSignals1,J
806,devel199_7,8,/home/vlados/datasets/chalearn/devel-136-250/d...,DivingSignals1,J
807,devel199_8,6,/home/vlados/datasets/chalearn/devel-136-250/d...,DivingSignals1,J


In [34]:
J_poses_df = pd.DataFrame(columns=column_names + ["name"])
for index, row in df.loc[df["user"] == "J"].iterrows():
    J_poses_df = pd.concat([J_poses_df, poses_df.loc[poses_df["name"] == row["name"]].iloc[9:]], ignore_index=True, sort=False)
J_poses_df

Unnamed: 0,j0_x,j0_y,j1_x,j1_y,j2_x,j2_y,j3_x,j3_y,j4_x,j4_y,...,j13_y,j14_x,j14_y,j15_x,j15_y,j16_x,j16_y,j17_x,j17_y,name
0,132,78,128,112,98,112,85,157,87,200,...,0,126,73,137,73,117,76,145,75,devel04_11
1,132,78,128,112,98,112,85,155,90,200,...,0,126,73,137,71,117,76,145,75,devel04_11
2,132,78,128,114,98,112,85,157,92,200,...,0,124,73,137,73,115,76,145,76,devel04_11
3,130,78,126,114,98,112,85,155,92,200,...,0,124,73,135,73,115,75,143,76,devel04_11
4,128,78,126,112,96,112,85,157,90,204,...,0,122,73,135,73,113,76,143,76,devel04_11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7694,160,86,158,121,130,121,115,165,115,200,...,0,154,82,165,82,147,86,175,86,devel199_9
7695,160,86,158,121,130,121,115,165,115,200,...,0,154,82,165,82,147,86,175,86,devel199_9
7696,160,86,158,121,130,121,113,163,115,200,...,0,154,82,165,82,147,86,175,86,devel199_9
7697,160,86,158,121,130,121,113,163,115,195,...,0,154,82,165,82,147,86,175,86,devel199_9


### Cut first and lact 1 second from "Z" user

In [33]:
Z_poses_df = pd.DataFrame(columns=column_names + ["name"])
for index, row in df.loc[df["user"] == "Z"].iterrows():
    Z_poses_df = pd.concat([Z_poses_df, poses_df.loc[poses_df["name"] == row["name"]].iloc[9:]], ignore_index=True, sort=False)
Z_poses_df

Unnamed: 0,j0_x,j0_y,j1_x,j1_y,j2_x,j2_y,j3_x,j3_y,j4_x,j4_y,...,j13_y,j14_x,j14_y,j15_x,j15_y,j16_x,j16_y,j17_x,j17_y,name
0,143,67,150,108,124,110,115,146,111,168,...,0,137,63,150,61,134,71,164,67,valid03_1
1,143,67,152,108,122,112,115,148,111,168,...,0,137,63,150,61,134,71,164,67,valid03_1
2,143,67,150,108,124,110,115,146,111,168,...,0,137,63,150,61,134,71,164,67,valid03_1
3,143,65,150,108,122,110,115,148,113,170,...,0,137,61,150,61,134,69,164,67,valid03_1
4,143,65,150,108,122,110,115,146,113,170,...,0,137,61,150,61,134,69,164,67,valid03_1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5950,152,69,160,110,132,110,119,157,109,196,...,0,147,65,158,63,143,73,173,69,devel197_47
5951,152,69,160,110,132,110,119,159,109,195,...,0,147,65,158,63,143,73,173,69,devel197_47
5952,152,69,160,110,132,110,119,157,111,196,...,0,147,65,158,63,143,75,173,71,devel197_47
5953,152,69,160,110,132,110,119,157,111,195,...,0,147,65,158,63,143,75,173,69,devel197_47


### Concatenate all

In [44]:
df.loc[(df["user"] != "J") & (df["user"] != "Z") & (df["user"] != "D")]

Unnamed: 0,name,label,path,lexicon,user
104,devel26_1,4,/home/vlados/datasets/chalearn/devel-21-135/de...,DivingSignals2,L
105,devel26_2,7,/home/vlados/datasets/chalearn/devel-21-135/de...,DivingSignals2,L
106,devel26_3,2,/home/vlados/datasets/chalearn/devel-21-135/de...,DivingSignals2,L
107,devel26_4,10,/home/vlados/datasets/chalearn/devel-21-135/de...,DivingSignals2,L
108,devel26_5,9,/home/vlados/datasets/chalearn/devel-21-135/de...,DivingSignals2,L
...,...,...,...,...,...
1321,devel475_5,9,/home/vlados/datasets/chalearn/devel-366-480/d...,DivingSignals1,M
1322,devel475_6,8,/home/vlados/datasets/chalearn/devel-366-480/d...,DivingSignals1,M
1323,devel475_7,5,/home/vlados/datasets/chalearn/devel-366-480/d...,DivingSignals1,M
1324,devel475_8,4,/home/vlados/datasets/chalearn/devel-366-480/d...,DivingSignals1,M


In [45]:
new_poses_df = pd.DataFrame(columns=column_names + ["name"])
for index, row in df.loc[(df["user"] != "J") & (df["user"] != "Z") & (df["user"] != "D")].iterrows():
    new_poses_df = pd.concat([new_poses_df, poses_df.loc[poses_df["name"] == row["name"]]loc[9:]], ignore_index=True, sort=False)

## Prepare data for training

In [56]:
sequence_size = int(count_min_frames_for_gesture(df,new_poses_df))

69.0 devel04_11 J
29.0 devel04_12 J
24.0 devel04_16 J
21.0 devel20_10 D
19.0 devel20_11 D
16.0 devel20_13 D
13.0 devel20_22 D
12.0 devel361_31 D
11.0 devel361_33 D
10.333333333333334 devel361_34 D
10.0 devel361_39 D
9.0 devel361_41 D


In [58]:
new_poses_df.iloc[0:3]

Unnamed: 0,j0_x,j0_y,j1_x,j1_y,j2_x,j2_y,j3_x,j3_y,j4_x,j4_y,...,j13_y,j14_x,j14_y,j15_x,j15_y,j16_x,j16_y,j17_x,j17_y,name
0,164,48,175,84,143,86,135,138,137,183,...,0,158,41,169,41,150,45,188,41,devel26_1
1,164,46,175,84,143,86,135,138,137,183,...,0,158,41,169,41,152,45,188,43,devel26_1
2,164,46,175,84,143,86,135,138,137,183,...,0,158,41,169,39,152,45,188,41,devel26_1


In [60]:
import math

In [62]:
final_labels_df = pd.DataFrame(columns=["label"])
final_poses_df = pd.DataFrame(columns=column_names + ["name"])
for index, row in df.iterrows():
    if index == 2:
        break
    poses_from_one_video = poses_df.loc[poses_df["name"] == row["name"]]
    temp_label_df = pd.DataFrame(columns=["label"])
    labels_string = str(row["label"]);
    splitted = labels_string.split(" ")
    num_labels = len(splitted) - 1
    label_sequence_size = math.ceil(poses_from_one_video.shape[0] / num_labels)
    for i, label in enumerate(splitted):
        if label == "":
            continue
        pose_sequence = poses_from_one_video.iloc[label_sequence_size * i:label_sequence_size + label_sequence_size * i]
        temp_label_df.at[0, "label"] = int(label)
        final_labels_df = pd.concat([final_labels_df, temp_label_df], ignore_index=True, sort=False)
        final_poses_df = pd.concat([final_poses_df, pose_sequence], ignore_index=True, sort=False)


In [63]:
final_labels_df

Unnamed: 0,label
0,3
1,1


In [64]:
final_poses_df

Unnamed: 0,j0_x,j0_y,j1_x,j1_y,j2_x,j2_y,j3_x,j3_y,j4_x,j4_y,...,j13_y,j14_x,j14_y,j15_x,j15_y,j16_x,j16_y,j17_x,j17_y,name
