<center style="font-weight:bold;font-size:20px">wbenbihi/hourglasstensorlfow: Stacked Hourglass Network for Human Pose Estimation</center>

<center style="font-weight:bold;font-size:20px">Parsing MPII Human Pose Dataset</center>

# Setup

## Imports

In [None]:
# Standard Imports
import os
import sys
import json
sys.path.append(os.path.join('..'))

In [None]:
# Specific Imports
import matplotlib.pyplot as plt
import tqdm
import pandas as pd
import numpy as np
import scipy.io
from config import CFG

## Global Variables

In [None]:
ROOT_FOLDER = CFG.ROOT_FOLDER
DATA_FOLDER = 'data'
MPII_MAT = 'mpii_human_pose_v1_u12_1.mat'

In [None]:
MAT_PATH = os.path.join(ROOT_FOLDER, DATA_FOLDER, MPII_MAT)

# Function definition

In [None]:
def parse_point(point):
    return {
        'point':{
            'x':point.__dict__.get('x')[0][0] if ('x' in point.__dict__) and (0 not in point.__dict__.get('x').shape) else None,
            'y':point.__dict__.get('y')[0][0] if ('y' in point.__dict__) and (0 not in point.__dict__.get('y').shape) else None,
            'id':point.__dict__.get('id')[0][0] if ('id' in point.__dict__) and (0 not in point.__dict__.get('id').shape) else None,
            'is_visible':point.__dict__.get('is_visible')[0][0] if ('is_visible' in point.__dict__) and (0 not in point.__dict__.get('is_visible').shape)  else None,
        }
    }

def parse_person(person, idx):
    return {
        'person':{
            'ridx':idx,
            'x1':person.__dict__.get('x1')[0][0] if 'x1' in (person.__dict__) and (0 not in person.__dict__.get('x1').shape) else None,
            'x2':person.__dict__.get('x2')[0][0] if 'x2' in (person.__dict__) and (0 not in person.__dict__.get('x2').shape) else None,
            'y1':person.__dict__.get('y1')[0][0] if 'y1' in (person.__dict__) and (0 not in person.__dict__.get('y1').shape) else None,
            'y2':person.__dict__.get('y2')[0][0] if 'y2' in (person.__dict__) and (0 not in person.__dict__.get('y2').shape) else None,
            'scale':person.__dict__.get('scale')[0][0] if 'scale' in (person.__dict__) and (0 not in person.__dict__.get('scale').shape) else None,
            'objpos':{
                'x':person.__dict__.get('objpos')[0][0].__dict__.get('x')[0][0] if ('objpos' in person.__dict__) and (0 not in person.__dict__.get('objpos').shape) else None,
                'y':person.__dict__.get('objpos')[0][0].__dict__.get('y')[0][0] if ('objpos' in person.__dict__) and (0 not in person.__dict__.get('objpos').shape) else None,
            },
            'points':[
                parse_point(point) for point in person.__dict__.get('annopoints')[0][0].__dict__['point'][0]
            ] if 'annopoints' in (person.__dict__) and (0 not in person.__dict__.get('annopoints').shape) else None,
        }
    }

def parse_persons(persons):
    return [
        parse_person(person, i)
        for i, person in enumerate(persons)
    ]

In [None]:
def cast_iterable(obj):
    if isinstance(obj, dict):
        return {k:cast_iterable(v) for k,v in obj.items()}
    elif isinstance(obj, list):
        return [cast_iterable(k) for k in obj]
    elif obj is None:
        return obj
    elif isinstance(obj, int) or np.issubdtype(type(obj), np.integer):
        return int(obj)
    elif isinstance(obj, float) or np.issubdtype(type(obj), np.float):
        return float(obj)
    elif isinstance(obj, str):
        return obj
    else:
        raise TypeError(f'{obj} is typed {type(obj)}')

# MPII Documentation

--------------------------------------------------------------------------- 
MPII Human Pose Dataset, Version 1.0 

Copyright 2015 Max Planck Institute for Informatics 

Licensed under the Simplified BSD License [see bsd.txt] 

--------------------------------------------------------------------------- 

We are making the annotations and the corresponding code freely available for research 
purposes. If you would like to use the dataset for any other purposes please contact 
the authors. 

## Introduction
MPII Human Pose dataset is a state of the art benchmark for evaluation
of articulated human pose estimation. The dataset includes around
**25K images** containing over **40K people** with annotated body
joints. The images were systematically collected using an established
taxonomy of every day human activities. Overall the dataset covers
**410 human activities** and each image assigned an activity
label. Each image was extracted from a YouTube video and provided with
preceding and following un-annotated frames. In addition, for the test
set we obtained richer annotations including body part occlusions and
3D torso and head orientations.

Following the best practices for the performance evaluation benchmarks
in the literature we withhold the test annotations to prevent
overfitting and tuning on the test set. We are working on an automatic
evaluation server and performance analysis tools based on rich test
set annotations.

## Citing the dataset
```
@inproceedings{andriluka14cvpr,
               author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}
               title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
               booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
               year = {2014},
               month = {June}
}
```

## Download

-. **Images (12.9 GB)**
   
   http://datasets.d2.mpi-inf.mpg.de/andriluka14cvpr/mpii_human_pose_v1.tar.gz
-. **Annotations (12.5 MB)**	
   
   http://datasets.d2.mpi-inf.mpg.de/andriluka14cvpr/mpii_human_pose_v1_u12.tar.gz
-. **Videos for each image (25 batches x 17 GB)**	

   http://datasets.d2.mpi-inf.mpg.de/andriluka14cvpr/mpii_human_pose_v1_sequences_batch1.tar.gz
   ...
   http://datasets.d2.mpi-inf.mpg.de/andriluka14cvpr/mpii_human_pose_v1_sequences_batch25.tar.gz
-. **Image - video mapping (239 KB)**	
   
   http://datasets.d2.mpi-inf.mpg.de/andriluka14cvpr/mpii_human_pose_v1_sequences_keyframes.mat

## Annotation description 
Annotations are stored in a matlab structure `RELEASE` having following fields

- `.annolist(imgidx)` - annotations for image `imgidx`
  - `.image.name` - image filename
  - `.annorect(ridx)` - body annotations for a person `ridx`
      - `.x1, .y1, .x2, .y2` - coordinates of the head rectangle
      - `.scale` - person scale w.r.t. 200 px height
      - `.objpos` - rough human position in the image
      - `.annopoints.point` - person-centric body joint annotations
        - `.x, .y` - coordinates of a joint
        - `id` 
            - joint id [//]: # "(0 - r ankle, 1 - r knee, 2 - r hip, 3 - l hip, 4 - l knee, 5 - l ankle, 6 - pelvis, 7 - thorax, 8 - upper neck, 9 - head top, 10 - r wrist, 10 - r wrist, 12 - r shoulder, 13 - l shoulder, 14 - l elbow, 15 - l wrist)"
        - `is_visible` - joint visibility
  - `.vidx` - video index in `video_list`
  - `.frame_sec` - image position in video, in seconds
 
- `img_train(imgidx)` - training/testing image assignment 
- `single_person(imgidx)` - contains rectangle id `ridx` of *sufficiently separated* individuals
- `act(imgidx)` - activity/category label for image `imgidx`
  - `act_name` - activity name
  - `cat_name` - category name
  - `act_id` - activity id
- `video_list(videoidx)` - specifies video id as is provided by YouTube. To watch video on youtube go to https://www.youtube.com/watch?v=video_list(videoidx) 

## Browsing the dataset
- Please use our online tool for browsing the data
http://human-pose.mpi-inf.mpg.de/#dataset
- Red rectangles mark testing images

## References
- **2D Human Pose Estimation: New Benchmark and State of the Art Analysis.**

  Mykhaylo Andriluka, Leonid Pishchulin, Peter Gehler and Bernt Schiele. 

  IEEE CVPR'14
- **Fine-grained Activity Recognition with Holistic and Pose based Features.**

  Leonid Pishchulin, Mykhaylo Andriluka and Bernt Schiele.

  GCPR'14

## Contact
You can reach us via `<lastname>@mpi-inf.mpg.de`
We are looking forward to your feedback. If you have any questions related to the dataset please let us know.


# Main Code

Since MPII Human Pose Dataset labels are recorded in a MATLAB .mat file, we need to parse it to a clean pandas DataFrame. This format is heavily nested and needs a little bit of exploration to parse it completely

In [None]:
# Load .mat file
mat = scipy.io.loadmat(MAT_PATH, struct_as_record=False)
release_mat = mat['RELEASE'][0][0]

We check if the `fieldnames` are correct

In [None]:
release_mat._fieldnames

In [None]:
# Accessing coordinates X of Point 0 from Person 0 in Image 4
release_mat.__dict__['annolist'][0][4].__dict__['annorect'][0][0].__dict__['annopoints'][0][0].__dict__['point'][0][0].x

In [None]:
# Train/Test Label
img_train = release_mat.__dict__.get('img_train')[0]

In [None]:
# List of Videos
video_list = release_mat.__dict__.get('video_list')[0]
video_list_json = [{'video': {'videoidx':i, 'video_list':item[0]}} for i, item in enumerate(video_list)]

In [None]:
mpii_version = release_mat.__dict__.get('version')[0]
annolist = release_mat.__dict__.get('annolist')[0]
single_person = release_mat.__dict__.get('single_person')
act = release_mat.__dict__.get('act')

## Handle act

In [None]:
len(act)

In [None]:
act[4][0]._fieldnames

In [None]:
act_json = [
    {
        'act':{
            'imgidx':i,
            'cat_name':elem[0].__dict__.get('cat_name')[0] if len(elem[0].__dict__.get('cat_name')) else None,
            'act_name':elem[0].__dict__.get('act_name')[0].split(', ') if len(elem[0].__dict__.get('act_name')) else None,
            'act_id':elem[0].__dict__.get('act_id')[0][0]
        }
    } 
    for i, elem in enumerate(act)
]

## Handle single_person

In [None]:
len(single_person)

In [None]:
single_person_json = [
    {
        'single_person':{
            'imgidx':i,
            'ridx': [elm[0] for elm in item[0]] if 0 not in item[0].shape else None
        }
    }
    for i, item in enumerate(single_person)
]

## Handle Annopoints

In [None]:
annolist[0]._fieldnames

In [None]:
annolist_parse_head = [
    {
        'annopoint':{
            'imgidx':i,
            'image':item.__dict__.get('image')[0][0].__dict__.get('name')[0],
            'annorect':item.__dict__.get('annorect'),
            'frame_sec':item.__dict__.get('frame_sec')[0] if 0 not in item.__dict__.get('frame_sec').shape else None,
            'vididx':item.__dict__.get('vididx')[0][0] if 0 not in item.__dict__.get('vididx').shape else None,
        }
    }
    for i, item in enumerate(annolist)
]

### Sample

In [None]:
# Sample with raw parsing
annolist_parse_head[0:5]

In [None]:
IDX = 4
annolist_parse_head[IDX]['annopoint']['annorect'][0]

In [None]:
IDPERS = 1
annolist_parse_head[IDX]['annopoint']['annorect'][0][IDPERS]._fieldnames

In [None]:
(annolist_parse_head[IDX]['annopoint']['annorect'][0][IDPERS].__dict__.get('x1')[0][0],
annolist_parse_head[IDX]['annopoint']['annorect'][0][IDPERS].__dict__.get('x2')[0][0],
annolist_parse_head[IDX]['annopoint']['annorect'][0][IDPERS].__dict__.get('y1')[0][0],
annolist_parse_head[IDX]['annopoint']['annorect'][0][IDPERS].__dict__.get('y2')[0][0])

In [None]:
annolist_parse_head[IDX]['annopoint']['annorect'][0][IDPERS].__dict__.get('scale')[0][0]

In [None]:
(annolist_parse_head[IDX]['annopoint']['annorect'][0][IDPERS].__dict__.get('objpos')[0][0].__dict__.get('x')[0][0],
annolist_parse_head[IDX]['annopoint']['annorect'][0][IDPERS].__dict__.get('objpos')[0][0].__dict__.get('y')[0][0])

In [None]:
annolist_parse_head[IDX]['annopoint']['annorect'][0][IDPERS].__dict__.get('annopoints')[0][0].__dict__['point'][0]

In [None]:
annolist_parse_head[IDX]['annopoint']['annorect'][0][IDPERS].__dict__.get('annopoints')[0][0].__dict__['point'][0][0]._fieldnames

In [None]:
(annolist_parse_head[IDX]['annopoint']['annorect'][0][IDPERS].__dict__.get('annopoints')[0][0].__dict__['point'][0][0].__dict__.get('x')[0][0],
annolist_parse_head[IDX]['annopoint']['annorect'][0][IDPERS].__dict__.get('annopoints')[0][0].__dict__['point'][0][0].__dict__.get('y')[0][0],
annolist_parse_head[IDX]['annopoint']['annorect'][0][IDPERS].__dict__.get('annopoints')[0][0].__dict__['point'][0][0].__dict__.get('id')[0][0],
annolist_parse_head[IDX]['annopoint']['annorect'][0][IDPERS].__dict__.get('annopoints')[0][0].__dict__['point'][0][0].__dict__.get('is_visible')[0][0])

### Coarse Parsing

In [None]:
annolist_parsed = [
    {
        'annopoint':{
            'imgidx':i,
            'image':item.__dict__.get('image')[0][0].__dict__.get('name')[0],
            'annorect':parse_persons(item.__dict__.get('annorect')[0]) if 0 not in item.__dict__.get('annorect').shape else None,
            'frame_sec':item.__dict__.get('frame_sec')[0][0] if 0 not in item.__dict__.get('frame_sec').shape else None,
            'vididx':item.__dict__.get('vididx')[0][0] if 0 not in item.__dict__.get('vididx').shape else None,
        }
    }
    for i, item in enumerate(annolist)
]

## To JSON

In [None]:
json_to_save = [
    (cast_iterable(annolist_parsed), 'annolist.json'),
    (cast_iterable(single_person_json), 'single_person.json'),
    (cast_iterable(video_list_json), 'video_list.json'),
    (cast_iterable(act_json), 'act.json'),
    (cast_iterable(img_train.tolist()), 'img_train.json'),
]

In [None]:
for d, p in json_to_save:
    with open(os.path.join(ROOT_FOLDER,DATA_FOLDER, p), 'w') as f:
        json.dump(d, f)