In [None]:
from IPython.display import HTML

## Testing from bash

In [None]:
!python mmaction2/demo/demo_skeleton.py dataset/uji_jalan.mp4 dataset/uji_jalan_out.mp4 \
    --config mmaction2/configs/skeleton/posec3d/slowonly_r50_8xb16-u48-240e_ntu60-xsub-keypoint.py \
    --checkpoint https://download.openmmlab.com/mmaction/skeleton/posec3d/slowonly_r50_u48_240e_ntu60_xsub_keypoint/slowonly_r50_u48_240e_ntu60_xsub_keypoint-f3adabf1.pth \
    --det-config mmaction2/demo/demo_configs/faster-rcnn_r50_fpn_2x_coco_infer.py \
    --det-checkpoint http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth \
    --det-score-thr 0.9 \
    --det-cat-id 0 \
    --pose-config mmaction2/demo/demo_configs/td-hm_hrnet-w32_8xb64-210e_coco-256x192_infer.py \
    --pose-checkpoint https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth \
    --label-map mmaction2/tools/data/skeleton/label_map_ntu60.txt

In [None]:
HTML('<video width=50% controls autoplay loop><source src="dataset/uji_jalan.mp4"></video>')

In [None]:
HTML('<video width=50% controls autoplay loop><source src="dataset/uji_jalan_out.mp4"></video>')

## Testing from python

In [None]:
import tempfile

import cv2
import mmcv
import mmengine
import torch
from mmengine.utils import track_iter_progress

from mmaction.apis import (detection_inference, inference_skeleton,
                           init_recognizer, pose_inference)
from mmaction.registry import VISUALIZERS
from mmaction.utils import frame_extract

import moviepy.editor as mpy

In [None]:
FONTFACE = cv2.FONT_HERSHEY_DUPLEX
FONTSCALE = 0.75
FONTCOLOR = (255, 255, 255)  # BGR, white
THICKNESS = 1
LINETYPE = 1

def visualize(pose_config, out_filename, frames, data_samples, action_label):
    pose_config = mmengine.Config.fromfile(pose_config)
    visualizer = VISUALIZERS.build(pose_config.visualizer)
    visualizer.set_dataset_meta(data_samples[0].dataset_meta)

    vis_frames = []
    print('Drawing skeleton for each frame')
    for d, f in track_iter_progress(list(zip(data_samples, frames))):
        f = mmcv.imconvert(f, 'bgr', 'rgb')
        visualizer.add_datasample(
            'result',
            f,
            data_sample=d,
            draw_gt=False,
            draw_heatmap=False,
            draw_bbox=True,
            show=False,
            wait_time=0,
            out_file=None,
            kpt_thr=0.3)
        vis_frame = visualizer.get_image()
        cv2.putText(vis_frame, action_label, (10, 30), FONTFACE, FONTSCALE,
                    FONTCOLOR, THICKNESS, LINETYPE)
        vis_frames.append(vis_frame)

    vid = mpy.ImageSequenceClip(vis_frames, fps=24)
    vid.write_videofile(out_filename, remove_temp=True)

In [None]:
video = 'dataset/uji_jalan.mp4'
out_filename = 'dataset/uji_jalan_out.mp4'

# Choose to use a config
config = 'mmaction2/configs/skeleton/posec3d/slowonly_r50_8xb16-u48-240e_ntu60-xsub-keypoint.py'
# Setup a checkpoint file to load
checkpoint = 'https://download.openmmlab.com/mmaction/skeleton/posec3d/slowonly_r50_u48_240e_ntu60_xsub_keypoint/slowonly_r50_u48_240e_ntu60_xsub_keypoint-f3adabf1.pth'

# human detection config
det_config = 'mmaction2/demo/demo_configs/faster-rcnn_r50_fpn_2x_coco_infer.py'
det_checkpoint = 'http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth'
det_score_thr = 0.9
det_cat_id = 0

# pose estimation config
pose_config = 'mmaction2/demo/demo_configs/td-hm_hrnet-w32_8xb64-210e_coco-256x192_infer.py'
pose_checkpoint = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth'
label_map = 'mmaction2/tools/data/skeleton/label_map_ntu60.txt'

In [None]:
device = 'cuda:0'
short_side = 480

In [None]:
tmp_dir = tempfile.TemporaryDirectory()
frame_paths, frames = frame_extract(video, short_side,
                                    tmp_dir.name)

h, w, _ = frames[0].shape

In [None]:
# Get Human detection results.
det_results, _ = detection_inference(det_config, det_checkpoint,
                                     frame_paths, det_score_thr,
                                     det_cat_id, device)
torch.cuda.empty_cache()

In [None]:
# Get Pose estimation results.
pose_results, pose_data_samples = pose_inference(pose_config,
                                                 pose_checkpoint,
                                                 frame_paths, det_results,
                                                 device)
torch.cuda.empty_cache()

In [None]:
# Initialize the recognizer
config = mmengine.Config.fromfile(config)
# build the model from a config file and a checkpoint file
model = init_recognizer(config, checkpoint, device)

# Get Action classification results.
result = inference_skeleton(model, pose_results, (h, w))

In [None]:
# find the index of highest predicted score on result
max_pred_index = result.pred_score.argmax().item()

label_map = [x.strip() for x in open(label_map).readlines()]

# set the highest predicted label as action_label
action_label = label_map[max_pred_index]
print(action_label)

In [None]:
visualize(pose_config, out_filename, frames, pose_data_samples, action_label)

tmp_dir.cleanup()

In [None]:
HTML('<video width=50% controls autoplay loop><source src="dataset/uji_jalan.mp4"></video>')

In [None]:
HTML('<video width=50% controls autoplay loop><source src="dataset/uji_jalan_out.mp4"></video>')