### Data Combine

This code supports combine 3 preprocessed into 1, eg:
- Openface + Marlin + Mediapipe

### Input and Output

Input files:
- 3x `npy` files prepare to combine

Output files:
- 1x `npy` combined file

In [3]:
import numpy as np

input_data_path_1 = 'EmotiW2023 Data Small/Xy_marlin_features_large.npy'
input_data_path_2 = 'EmotiW2023 Data Small/Xy_engage_bodypose.npy'
input_data_path_3 = 'EmotiW2023 Data Small/Xy_engage_gaze+hp+au.npy'

output_data_path = 'EmotiW2023 Data Small/Xy_engage_gaze+hp+au_marlin_bodypose.npy'

In [5]:
Xy_1 = np.load(input_data_path_1, allow_pickle=True)
Xy_2 = np.load(input_data_path_2, allow_pickle=True)
Xy_3 = np.load(input_data_path_3, allow_pickle=True)

print(len(Xy_1),len(Xy_2),len(Xy_3))
print(Xy_1[0],Xy_2[0],Xy_3[0])

8040 7773 8034
['subject_0_2msdhgqawh_vid_0_0.mp4'
 array([[-0.13852222, -0.03409084,  0.30835477, ..., -0.27071905,
         -0.05114482, -0.0084425 ],
        [-0.16257848, -0.05500085,  0.38991776, ..., -0.23623376,
         -0.07379916, -0.00473824],
        [-0.171606  , -0.07018153,  0.36333498, ..., -0.22672045,
         -0.08576258,  0.01905022],
        ...,
        [-0.10186562, -0.04683093,  0.3170146 , ..., -0.2960978 ,
         -0.05658226, -0.0413123 ],
        [-0.11917496,  0.00947589,  0.18174954, ..., -0.27356213,
          0.09654579, -0.00636648],
        [-0.11901491,  0.01756562,  0.17249958, ..., -0.19407684,
         -0.05737707, -0.07285963]], dtype=float32)
 'Engaged'] ['subject_68_0ng3yqwrg6_vid_0_0.mp4'
 array([[1.12808827e-04, 1.35073279e-04, 2.33070443e-02, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [8.37831144e-07, 5.83291142e-05, 3.40727347e-03, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [9.97596998

In [6]:
# Create dictionaries for lookup
x1_fm = {xy[0]: xy[1] for xy in Xy_1}
x3_fm = {xy[0]: xy[1] for xy in Xy_3}

# Combine all three datasets
data = []
for idx, xy in enumerate(Xy_2):
    key = xy[0]
    
    if key in x1_fm and key in x3_fm:
        x1 = x1_fm[key]
        x3 = x3_fm[key]
        
        if type(x1) != tuple:  # Keep the original type check
            data.append((key, xy[1], x1, x3, xy[2]))  # Include data from all 3 sources
    else:
        if key not in x1_fm:
            print(f"Key {key} not found in x1_fm")
        if key not in x3_fm:
            print(f"Key {key} not found in x3_fm")

Key subject_28_xf4ogs7wj4_vid_0_0.mp4 not found in x3_fm
Key subject_28_xf4ogs7wj4_vid_0_1.mp4 not found in x3_fm
Key subject_28_xf4ogs7wj4_vid_0_10.mp4 not found in x3_fm
Key subject_114_randombn205mtt5s_vid_0_0.mp4 not found in x3_fm
Key subject_61_9lo6k238pu_vid_0_0.mp4 not found in x3_fm


In [7]:
np.save(output_data_path, np.array(data, dtype='object'))

In [8]:
data = np.array(data, dtype='object')
len(data)

7768

In [9]:
Xy_test = np.load(output_data_path, allow_pickle=True)
Xy_test[0]

array(['subject_68_0ng3yqwrg6_vid_0_0.mp4',
       array([[1.12808827e-04, 1.35073279e-04, 2.33070443e-02, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
              [8.37831144e-07, 5.83291142e-05, 3.40727347e-03, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
              [9.97596998e-06, 4.23564044e-05, 5.18335506e-03, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
              ...,
              [1.02391571e-07, 1.85546987e-06, 5.48816644e-03, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
              [1.06349875e-07, 1.94433832e-07, 1.85562008e-03, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
              [4.73229441e-07, 1.29487322e-06, 6.26760635e-04, ...,
               0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])   ,
       array([[ 0.03229721, -0.10042803,  0.4504739 , ..., -0.06554577,
                0.16712351,  0.02580368],
              [-0.02973243