## Extract diving pose gestures

In [1]:
import pandas as pd

In [2]:
chalearn_path = "/home/vlados/datasets/chalearn/"
chalearn_labels_path = chalearn_path + "Info_devel_valid.txt"

In [3]:
chalearn_labels_df = pd.read_csv(chalearn_labels_path, sep="\t")

In [4]:
chalearn_labels_df.head()

Unnamed: 0,Set,Num,Lexicon,UserID,Date,MinDepth,MaxDepth,DepthRes,DepthAcc,Missing
0,devel,1,CanadaAviationGroundCirculation1,A,2011 10 05 16 26,801,1964,76,2,0
1,devel,2,RefereeWrestlingSignals1,I,2011 09 28 14 31,801,1968,71,1,0
2,devel,3,GangHandSignals1,L,2011 09 27 12 16,801,1506,95,1,0
3,devel,4,DivingSignals2,J,2011 09 26 09 01,801,1869,100,1,0
4,devel,5,GestunoDisaster,L,2011 10 11 15 16,824,1964,91,2,0


In [5]:
chlearn_diving_labels_df = chalearn_labels_df.loc[chalearn_labels_df["Lexicon"].isin(["DivingSignals1", "DivingSignals2"])].drop(chalearn_labels_df.loc[:,"UserID":], axis = 1)
chlearn_diving_labels_df

Unnamed: 0,Set,Num,Lexicon
3,devel,4,DivingSignals2
19,devel,20,DivingSignals1
22,valid,3,DivingSignals2
45,devel,26,DivingSignals2
54,devel,35,DivingSignals1
62,devel,43,DivingSignals2
71,devel,52,DivingSignals1
90,devel,71,DivingSignals1
92,devel,73,DivingSignals1
98,devel,79,DivingSignals2


In [6]:
import os

In [7]:
def find_dir(number, path, name):
    for dirname in os.listdir(path):
        splitted = dirname.split("-")
        if splitted[0] != name:
            continue
        if (int(splitted[1]) < number <= int(splitted[2])):
            subpath = os.path.join(path, dirname)
            for subdirname in os.listdir(subpath):
                subsplitted = subdirname.split(name)
                if subsplitted[0] != "":
                    continue
                if int(subsplitted[1]) == number:
                    dest_path = os.path.join(subpath, subdirname)
                    for dest_file in os.listdir(dest_path):
                        if dest_file.split(".")[1] == "csv":
                            yield dest_path, dest_file
                            

In [18]:
df = pd.DataFrame(columns=["name", "label", "path"])
for index, row in chlearn_diving_labels_df.iterrows():
    for dest_path, csv_file  in find_dir(row["Num"], chalearn_path, row["Set"]):
        df_set = pd.read_csv(os.path.join(dest_path, csv_file), names=["name", "label"])
        df_set["path"] = dest_path
        df_set["lexicon"] = row["Lexicon"]
        for subindex, subrow in df_set.iterrows():
            subrow.at["name"].split("_")[1]
            subrow.at["path"] = os.path.join(subrow.at["path"], "M_" + subrow.at["name"].split("_")[1] + ".avi")
        df = pd.concat([df, df_set], ignore_index=True, sort=False)
df.shape

(1326, 4)

## Extract poses from each video

In [9]:
import numpy as np
import cv2
import os
import torch
import torch.nn as nn

os.sys.path.append('poseEstimation')
from poseEstimation.demo import infer_fast, VideoReader
from poseEstimation.modules.pose import Pose
from poseEstimation.modules.load_state import load_state
from poseEstimation.modules.keypoints import extract_keypoints, group_keypoints
from poseEstimation.models.with_mobilenet import PoseEstimationWithMobileNet

In [10]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# DEVICE = "cpu"
print("Using device: " + DEVICE)
if torch.backends.cudnn.is_available():
    torch.backends.cudnn.enabled = True

SEED = 42

np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

net = PoseEstimationWithMobileNet()
checkpoint = torch.load(
    "weights/checkpoint_iter_370000.pth", map_location='cpu')
load_state(net, checkpoint)

Using device: cuda


In [11]:
column_names = [  "j0_x",  "j0_y", "j1_x", "j1_y" , "j2_x", "j2_y", "j3_x", "j3_y", "j4_x", "j4_y", "j5_x", "j5_y", "j6_x", "j6_y", "j7_x", "j7_y", "j8_x", "j8_y", "j9_x", "j9_y", "j10_x", "j10_y", "j11_x", "j11_y", "j12_x", "j12_y", "j13_x", "j13_y", 'j14_x', "j14_y", "j15_x", "j15_y", "j16_x", "j16_y", "j17_x", "j17_y" ]

In [12]:
def infer(net, image_provider, height_size, cpu):
    net = net.eval()
    if not cpu:
        net = net.cuda()

    stride = 8
    upsample_ratio = 4
    num_keypoints = Pose.num_kpts

    pose_sequence = []
    prediction = 0
    prediction_made = False

    for img in image_provider:
        heatmaps, pafs, scale, pad = infer_fast(
            net, img, height_size, stride, upsample_ratio, cpu)

        total_keypoints_num = 0
        all_keypoints_by_type = []
        for kpt_idx in range(num_keypoints):  # 19th for bg
            total_keypoints_num += extract_keypoints(
                heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num)

        pose_entries, all_keypoints = group_keypoints(
            all_keypoints_by_type, pafs)
        for kpt_id in range(all_keypoints.shape[0]):
            all_keypoints[kpt_id, 0] = (
                all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale
            all_keypoints[kpt_id, 1] = (
                all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale
        current_poses = []
        for n in range(len(pose_entries)):
            if len(pose_entries[n]) == 0:
                continue
            pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1
            for kpt_id in range(num_keypoints):
                
                if pose_entries[n][kpt_id] != -1.0:  # keypoint was found
                    pose_keypoints[kpt_id, 0] = int(
                        all_keypoints[int(pose_entries[n][kpt_id]), 0])
                    pose_keypoints[kpt_id, 1] = int(
                        all_keypoints[int(pose_entries[n][kpt_id]), 1])
                else:
                    pose_keypoints[kpt_id, 0] = 0
                    pose_keypoints[kpt_id, 1] = 0
            pose = Pose(pose_keypoints, pose_entries[n][18])
            
            current_poses.append(pose)

        if (len(current_poses) > 0):
            pose_sequence.append(current_poses[0].keypoints.reshape([36]))

    return pose_sequence

In [19]:
poses_df = pd.DataFrame(columns=column_names +["name"])
for index, row in df.iterrows():
    if index == 10:
        break
    print(row["name"])
    frame_provider = VideoReader(row["path"])
    pose_sequence = infer(net, frame_provider, 256, False)
    pose_df = pd.DataFrame(pose_sequence, columns=column_names)
    pose_df["name"] = row["name"]
    poses_df = pd.concat([poses_df, pose_df], ignore_index=True, sort=False)

devel04_11
devel04_12
devel04_13
devel04_14
devel04_15
devel04_16
devel04_17
devel04_18
devel04_19
devel04_20


In [20]:
poses_df

Unnamed: 0,j0_x,j0_y,j1_x,j1_y,j2_x,j2_y,j3_x,j3_y,j4_x,j4_y,...,j13_y,j14_x,j14_y,j15_x,j15_y,j16_x,j16_y,j17_x,j17_y,name
0,132,78,128,110,100,110,90,150,117,166,...,0,126,73,137,73,117,76,145,76,devel04_11
1,132,78,128,112,100,112,89,151,113,172,...,0,126,73,137,73,117,76,145,76,devel04_11
2,132,78,128,112,98,112,89,151,105,185,...,0,128,73,137,73,117,76,145,76,devel04_11
3,132,78,128,114,98,112,90,157,102,198,...,0,126,73,139,73,117,76,145,76,devel04_11
4,132,78,128,112,98,112,89,159,92,204,...,0,126,73,137,71,117,76,145,75,devel04_11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
784,137,73,135,108,105,108,94,153,98,195,...,0,132,67,143,69,122,71,150,71,devel04_20
785,137,73,135,108,105,108,92,153,94,193,...,0,132,67,143,69,122,71,150,71,devel04_20
786,137,73,135,108,104,108,92,151,96,191,...,0,132,67,143,69,120,71,150,71,devel04_20
787,137,73,135,108,104,108,92,150,100,187,...,0,132,67,143,69,120,71,150,71,devel04_20
