In [12]:
import os
ROOT_PATH = '/sources/dataset'
VER = 'ver0'
SAVE_PATH = os.path.join(ROOT_PATH, 'features', VER)
os.makedirs(SAVE_PATH, exist_ok=True)

description = \
    """ ver 0. \n 
        얼굴 랜드마크:
        LIPSOUT_LM = [0, 267, 269, 270, 409, 287, 375, 321, 405, 314, 17, 84, 181, 91, 146, 57, 185, 40, 39, 37]
        LIPSIN_LM = [13, 312, 311, 310, 415, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95, 78, 191, 80, 81, 82]

        포즈 랜드마크: 
        PNOSE_LM = [0]
        PFACE_LM = [8, 6, 5, 4, 1,2,3,7]
        BODY_LM = [11, 12, 24 ,23]
        ARM_LM = [14, 16, 22, 20, 18, 13, 15, 21, 19, 17]

        손 랜드마크: 전부 사용

        최종 데이터 형태: [1, 3150]

        데이터 생성 파이프라인 요약
        1. 랜드마크 선택: [N, 543, 3] -> [N, 105, 3]
        2. 프레임 interpolation: N % SEGMENT == 0, SEGMENT=5     
        3. SEGMENT 별로 프레임 축에 대해서 mean [SEGMENT, 105, 3] , std 계산: [SEGMENT, 105, 3] -> [2*SEGMENT, 105, 3] (mean*5, std*5)
        3.1 mean, std 계산 시에 NaN -> 0 하여 계산에 미포함 
        4. Flatten: [1, 3150]
    """

with open(os.path.join(ROOT_PATH, 'features', VER, 'description.txt'), 'w') as f:
    f.write(description)

In [3]:
import json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from scipy.ndimage import zoom
from tqdm import tqdm

In [4]:
ROWS_PER_FRAME = 543  # number of landmarks per frame

def load_relevant_data_subset(pq_path):
    data_columns = ['x', 'y', 'z']
    data = pd.read_parquet(pq_path, columns=data_columns)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

### Config

In [5]:
SEGMENTS = 5
LEFT_HAND_OFFSET = 468
POSE_OFFSET = LEFT_HAND_OFFSET+21
RIGHT_HAND_OFFSET = POSE_OFFSET+33

LIPSOUT_LM = [0, 267, 269, 270, 409, 287, 375, 321, 405, 314, 17, 84, 181, 91, 146, 57, 185, 40, 39, 37]
LIPSIN_LM = [13, 312, 311, 310, 415, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95, 78, 191, 80, 81, 82]

PNOSE_LM = [0]
PFACE_LM = [8, 6, 5, 4, 1,2,3,7]
BODY_LM = [11, 12, 24 ,23]
ARM_LM = [14, 16, 22, 20, 18, 13, 15, 21, 19, 17]

lip_landmarks = LIPSIN_LM + LIPSOUT_LM
pose_landmarks = PNOSE_LM + PFACE_LM + BODY_LM + ARM_LM
left_hand_landmarks = list(range(LEFT_HAND_OFFSET, LEFT_HAND_OFFSET+21))
right_hand_landmarks = list(range(RIGHT_HAND_OFFSET, RIGHT_HAND_OFFSET+21))

point_landmarks =  [item for sublist in [lip_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks] for item in sublist]
LANDMARKS = len(point_landmarks)
print(LANDMARKS)


105


In [6]:
def torch_nan_mean(x, axis=1):
    nan_mask = torch.isnan(x)
    zero_mask = torch.zeros_like(x)
    ones_mask = torch.ones_like(x)
    
    # Replace NaN values with zeros
    x = torch.where(nan_mask, zero_mask, x)

    # Compute the sum of non-NaN values along the specified axis
    sum_values = torch.sum(x, dim=axis)
    count_values = torch.sum(torch.where(nan_mask, zero_mask, ones_mask), dim=axis)
    
    # Compute the mean
    mean_values = sum_values / count_values
    
    return mean_values

def torch_nan_std(x, axis=1):
    mean_values = torch_nan_mean(x, axis=axis)

    d = x - mean_values.unsqueeze(1)
    return torch.sqrt(torch_nan_mean(d * d, axis=axis))

def fill_nan_zero(x):
    nan_mask = torch.isnan(x)
    zero_mask = torch.zeros_like(x)

    # Replace NaN values with zeros
    x = torch.where(nan_mask, zero_mask, x)
    return x

    

### Feature Generation class

In [7]:
class FeatureGen():
    def __init__(self, segments: int=5):
        LEFT_HAND_OFFSET = 468
        POSE_OFFSET = LEFT_HAND_OFFSET+21
        RIGHT_HAND_OFFSET = POSE_OFFSET+33

        LIPSOUT_LM = [0, 267, 269, 270, 409, 287, 375, 321, 405, 314, 17, 84, 181, 91, 146, 57, 185, 40, 39, 37]
        LIPSIN_LM = [13, 312, 311, 310, 415, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95, 78, 191, 80, 81, 82]

        PNOSE_LM = [0]
        PFACE_LM = [8, 6, 5, 4, 1,2,3,7]
        BODY_LM = [11, 12, 24 ,23]
        ARM_LM = [14, 16, 22, 20, 18, 13, 15, 21, 19, 17]

        lip_landmarks = LIPSIN_LM + LIPSOUT_LM
        pose_landmarks = PNOSE_LM + PFACE_LM + BODY_LM + ARM_LM
        left_hand_landmarks = list(range(LEFT_HAND_OFFSET, LEFT_HAND_OFFSET+21))
        right_hand_landmarks = list(range(RIGHT_HAND_OFFSET, RIGHT_HAND_OFFSET+21))

        self.point_landmarks =  [item for sublist in [lip_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks] for item in sublist]
        self.segments = segments
        
    def __call__(self, x):
        x = np.take(x, self.point_landmarks, axis=1)    # [N, 105, 3]
        n_frame, num_landmark, num_coord = x.shape[0], x.shape[1], x.shape[2]
        new_n_frame = n_frame + (self.segments - (n_frame % self.segments))
        x = zoom(x, (new_n_frame, num_landmark, num_coord) / np.array(x.shape), order=1)     # [N', num_ladmark, 3]
        
        x = torch.tensor(x, dtype=torch.float32)
        frame_per_seg = x.shape[0] // self.segments
        x = x.view(-1, frame_per_seg, 105, 3)    # [segments, frame_per_seg, num_landmark, 3]

        x_mean = fill_nan_zero(torch_nan_mean(x))   # [segments, num_landmark, 3]
        x_std = fill_nan_zero(torch_nan_std(x))  # [segments, num_landmark, 3]

        feat = torch.cat([x_mean, x_std], axis=0)   # [2*segments, num_landmark, 3]
        feat = feat.view(1, -1) # [1, 2*segments * num_landmark * 3]

        return feat
    
feat_converter = FeatureGen()

### Convert raw data and save

In [9]:
from tqdm import tqdm

def convert_row(row, right_handed=True):
    x = load_relevant_data_subset(os.path.join(ROOT_PATH, row.path))
    x = feat_converter(x).cpu().numpy()
    return x, row.label

def convert_and_save_data():
    df = pd.read_csv(os.path.join(ROOT_PATH, 'train.csv'))
    label_map = json.load(open(os.path.join(ROOT_PATH, 'sign_to_prediction_index_map.json')))
    df['label'] = df['sign'].map(label_map)

    total = df.shape[0]

    npdata = np.zeros((total, 2*SEGMENTS*LANDMARKS*3))
    nplabels = np.zeros(total)

    for i, row in tqdm(enumerate(df.itertuples()), total=total):
        (x, y) = convert_row(row)
        npdata[i, :] = x
        nplabels[i] = y

        if i == total - 1:
            break
    
    np.save(os.path.join(SAVE_PATH, 'feature_data.npy'), npdata)
    np.save(os.path.join(SAVE_PATH, 'feature_labels.npy'), nplabels)

In [10]:
convert_and_save_data()

100%|█████████▉| 94476/94477 [09:28<00:00, 166.10it/s]


In [11]:
X = np.load(os.path.join(SAVE_PATH, 'feature_data.npy'))
y = np.load(os.path.join(SAVE_PATH, 'feature_labels.npy'))
print(X.shape, y.shape)

print(X[0, :].shape, X[0, :])

(94477, 3150) (94477,)
(3150,) [ 0.50394118  0.39283019 -0.01993244 ...  0.          0.
  0.        ]
