### Mediapipe

### Input and Output
Input files:
- Mediapipe `csv` files folder
- Label `xlsx` file

Output files:
- save the processed file in `npy` format:
  - `bodypose.npy`

In [13]:
import glob
import numpy as np
import pandas as pd
import os
from tqdm import tqdm

n_segments = 20

feature_files = glob.glob("lEmotiW2023 Data Small/mediapipe/*.csv")
labels = pd.read_excel('EmotiW2023 Data Small/engagement_labels.xlsx')
file_path_dir = 'EmotiW2023 Data Small/mediapipe/'

In [14]:
def filter_bodypose(row):
    for i in range(33):
        if row[f'visibility{i}'] < 0.9:
            row[f'x{i}'] = 0
            row[f'y{i}'] = 0
            row[f'z{i}'] = 0
    return row

def get_features(input_segment):
    if len(input_segment) == 0:
        return []
    return input_segment.var()

def path_to_csv(fname):
    return fname.split('.mp4')[0] + '.csv'

In [21]:
drop_cols = ['frame'] + [f'visibility{i}' for i in range(33)]
n_segments = 20

def parse_features(file_path):
    
    df = pd.read_csv(file_path)
    df = df.apply(filter_bodypose, axis=1)

    df.drop(drop_cols, axis=1, inplace=True)

    window_size = int(df.shape[0]/n_segments)
    
    if window_size < 1:
        return []
    instance_features = []
    start_index = 0
    for i in range(n_segments):
        if i == n_segments - 1:
            end_index = df.shape[0]
        else:
            end_index = start_index + window_size
        
        # update here for our dataset
#         print (start_index, end_index)
        index_features = get_features(df.iloc[start_index:end_index, :])
        if len(index_features):
            instance_features.append(index_features)
        start_index = start_index + window_size
    
    
    instance_features = np.vstack(instance_features)
    assert instance_features.shape[0] == 20, "shape issue"

    return instance_features

_errors_ = []
def extract_bodypose_features():
    data = []
    for f in tqdm(labels.to_dict(orient='records')):
        
        try:
            #fname = file_path_dir + path_to_csv(f['chunk'].split('/')[-1])
            #features = parse_features(fname)
            fname = path_to_csv(f['chunk'])
            fname = file_path_dir + fname.split('/')[-1]
            features = parse_features(fname)
            data.append((fname, 
                         features, 
                         f['label']
                        ))
        except FileNotFoundError:
            _errors_.append(fname)
        except Exception as e:
            print ("exception: ", e)
            _errors_.append(fname)
            
    return np.array(data, dtype='object')

In [22]:
# extract features by feature type
Xy = extract_bodypose_features()

100%|████████████████████████████████████████████████████████████████████████████| 8040/8040 [1:13:27<00:00,  1.82it/s]


In [23]:
Xy[0]

array(['EmotiW2023 Data Small/mediapipe/subject_68_0ng3yqwrg6_vid_0_0.csv',
       array([[1.12808827e-04, 1.35073279e-04, 2.33070443e-02, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
              [8.37831144e-07, 5.83291142e-05, 3.40727347e-03, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
              [9.97596998e-06, 4.23564044e-05, 5.18335506e-03, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
              ...,
              [1.02391571e-07, 1.85546987e-06, 5.48816644e-03, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
              [1.06349875e-07, 1.94433832e-07, 1.85562008e-03, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
              [4.73229441e-07, 1.29487322e-06, 6.26760635e-04, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])   ,
       'Engaged'], dtype=object)

In [35]:
np.save('EmotiW2023 Data Small/Xy_engage_bodypose.npy', Xy)

In [36]:
def path_to_mp4(fname):
    return fname.split('.csv')[0] + '.mp4'

# s = df.drop(drop_cols, axis=1).var()

In [37]:
Xy_pdm = np.load('EmotiW2023 Data Small/Xy_engage_bodypose.npy', allow_pickle=True)

In [38]:
for i in range(len(Xy_pdm)):
    #Xy_pdm[i][0] = 'chunks/' + path_to_mp4(Xy_pdm[i][0].split('/')[-1])
    Xy_pdm[i][0] = path_to_mp4(Xy_pdm[i][0].split('/')[-1])

Xy_pdm[0]
# Make sure it is array(['subject_68_0ng3yqwrg6_vid_0_0.mp4', array([[1.12808827e-04, 1.35073279e-04, ... 

array(['subject_68_0ng3yqwrg6_vid_0_0.mp4',
       array([[1.12808827e-04, 1.35073279e-04, 2.33070443e-02, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
              [8.37831144e-07, 5.83291142e-05, 3.40727347e-03, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
              [9.97596998e-06, 4.23564044e-05, 5.18335506e-03, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
              ...,
              [1.02391571e-07, 1.85546987e-06, 5.48816644e-03, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
              [1.06349875e-07, 1.94433832e-07, 1.85562008e-03, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
              [4.73229441e-07, 1.29487322e-06, 6.26760635e-04, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])   ,
       'Engaged'], dtype=object)

In [39]:
np.save('EmotiW2023 Data Small/Xy_engage_bodypose.npy', Xy_pdm)