In [187]:
import os
import json


import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
from scipy.interpolate import CubicSpline

In [188]:
sample_video = {
    "positions": {
        "1.0": {
            "Nose": [0.4727230370044708, 0.3838244676589966, -0.27421602606773376],
            "Left_eye_inner": [0.4753738045692444, 0.37479984760284424, -0.26180776953697205],
            "Left_eye": [0.47714120149612427, 0.3745166063308716, -0.26183751225471497],
            "Left_eye_outer": [0.4789522588253021, 0.37425851821899414, -0.26184573769569397],
            "Right_eye_inner": [0.4696425199508667, 0.37428316473960876, -0.2632329761981964],
            "Right_eye": [0.4674718976020813, 0.37366846203804016, -0.26325973868370056],
            "Right_eye_outer": [0.4651995897293091, 0.37315359711647034, -0.26330235600471497],
            "Left_ear": [0.4796261489391327, 0.3754121959209442, -0.16696897149085999],
            "Right_ear": [0.4597683548927307, 0.3751758933067322, -0.1745608150959015],
            "Mouth_left": [0.475565642118454, 0.39149120450019836, -0.23681190609931946],
            "Mouth_right": [0.4675469994544983, 0.39173346757888794, -0.2391127645969391],
            "Left_shoulder": [0.4979342222213745, 0.42636486887931824, -0.0960470512509346],
            "Right_shoulder": [0.4418589472770691, 0.43039795756340027, -0.09553270041942596],
            "Left_elbow": [0.5008105039596558, 0.4966505765914917, -0.05243808776140213],
            "Right_elbow": [0.4394679069519043, 0.5003064870834351, -0.05494270101189613],
            "Left_wrist": [0.502241313457489, 0.5615555047988892, -0.13062617182731628],
            "Right_wrist": [0.4392949938774109, 0.5611810088157654, -0.13273857533931732],
            "Left_pinky": [0.5023744702339172, 0.5801471471786499, -0.15442657470703125],
            "Right_pinky": [0.44217008352279663, 0.5799118876457214, -0.1556280553340912],
            "Left_index": [0.4990525245666504, 0.5818195939064026, -0.17855951189994812],
            "Right_index": [0.44541043043136597, 0.5800011157989502, -0.18217860162258148],
            "Left_thumb": [0.49780577421188354, 0.5752022862434387, -0.14155635237693787],
            "Right_thumb": [0.44588392972946167, 0.5738919973373413, -0.14443477988243103],
            "Left_hip": [0.4885951280593872, 0.5648671984672546, -0.0021941151935607195],
            "Right_hip": [0.4544827342033386, 0.5639280676841736, 0.00222505209967494],
            "Left_knee": [0.48854485154151917, 0.6586241722106934, -0.02576499618589878],
            "Right_knee": [0.45985522866249084, 0.6649723649024963, -0.01617051102221012],
            "Left_ankle": [0.49369779229164124, 0.7492525577545166, 0.1270214319229126],
            "Right_ankle": [0.4526473581790924, 0.7565955519676208, 0.11720269173383713],
            "Left_heel": [0.49099811911582947, 0.7575002312660217, 0.13673612475395203],
            "Right_heel": [0.45248162746429443, 0.7645536065101624, 0.12494811415672302],
            "Left_foot_index": [0.4925276041030884, 0.7878919243812561, 0.04002412036061287],
            "Right_foot_index": [0.45452722907066345, 0.7990591526031494, 0.018001534044742584]
        },
        "2.0": {
            "Nose": [0.4726940393447876, 0.3831414580345154, -0.26175472140312195],
            "Left_eye_inner": [0.47510889172554016, 0.373349666595459, -0.24973495304584503],
            "Left_eye": [0.47685539722442627, 0.37310147285461426, -0.24976149201393127],
            "Left_eye_outer": [0.4786836504936218, 0.3729214072227478, -0.24976757168769836],
            "Right_eye_inner": [0.46944373846054077, 0.3729451894760132, -0.2508068382740021],
            "Right_eye": [0.4673190414905548, 0.3724323809146881, -0.25082799792289734],
            "Right_eye_outer": [0.46509063243865967, 0.37205004692077637, -0.25087013840675354],
            "Left_ear": [0.4796132743358612, 0.37473800778388977, -0.1597304791212082],
            "Right_ear": [0.45977213978767395, 0.37455999851226807, -0.16519752144813538],
            "Mouth_left": [0.4756295382976532, 0.3907076120376587, -0.22632761299610138],
            "Mouth_right": [0.46772199869155884, 0.3906969428062439, -0.22808308899402618],
            "Left_shoulder": [0.4979526400566101, 0.4263125956058502, -0.09424526989459991],
            "Right_shoulder": [0.4417797923088074, 0.43034741282463074, -0.08744104951620102],
            "Left_elbow": [0.5012014508247375, 0.4964407980442047, -0.04724247008562088],
            "Right_elbow": [0.4388962984085083, 0.5015712380409241, -0.04607902467250824],
            "Left_wrist": [0.5024884939193726, 0.5597358345985413, -0.11459018290042877],
            "Right_wrist": [0.4389660060405731, 0.5606597661972046, -0.1166703924536705],
            "Left_pinky": [0.5031002759933472, 0.5795527696609497, -0.13527444005012512],
            "Right_pinky": [0.4410523474216461, 0.5797632336616516, -0.13701032102108002],
            "Left_index": [0.5001664757728577, 0.5803791284561157, -0.15812329947948456],
            "Right_index": [0.4441104233264923, 0.5790987014770508, -0.1630110740661621],
            "Left_thumb": [0.49856647849082947, 0.5737610459327698, -0.12493623793125153],
            "Right_thumb": [0.4444619417190552, 0.5730152726173401, -0.12801975011825562],
            "Left_hip": [0.48892828822135925, 0.5621697902679443, -0.005039631854742765],
            "Right_hip": [0.45490607619285583, 0.5626218318939209, 0.005029990803450346],
            "Left_knee": [0.4885903000831604, 0.6585646271705627, -0.02346721664071083],
            "Right_knee": [0.46099233627319336, 0.6625020503997803, -0.0035396532621234655],
            "Left_ankle": [0.49394509196281433, 0.7517425417900085, 0.12642322480678558],
            "Right_ankle": [0.45273780822753906, 0.7559921145439148, 0.13847103714942932],
            "Left_heel": [0.4910394847393036, 0.7606337666511536, 0.13554228842258453],
            "Right_heel": [0.45263031125068665, 0.7640327215194702, 0.14737264811992645],
            "Left_foot_index": [0.49276602268218994, 0.7902218103408813, 0.035742588341236115],
            "Right_foot_index": [0.45474597811698914, 0.7973014116287231, 0.04534585028886795]
        },
    }
}

  <!-- for column in row.index:
            # Extract joint name and position (e.g., Nose_x)
            joint = '_'.join(column.split('_')[1:])
            if joint not in mean_data_series:
                mean_data_series[joint] = []
            mean_data_series[joint].append(row[column]) -->

Let say we have
a[1_a_x:5, 1_a_y:4, 2_a_x:3, 2_a_y:7]
The above code give:
result = {
a_x:[5,3],
a_y:[4,7]
}


In [189]:
def getFilesPath(directory):
    files = []
    for root, dirs, file in os.walk(directory):
        for f in file:
            files.append(os.path.join(root, f))
    return files


def loadFiles(files_path, activities, joints_mapping):
    activity_dfs = {}
    for activity in activities:
        dataframes = []
        for file_path in files_path:
            if file_path.endswith('.json') and activity in file_path:
                with open(file_path, 'r') as file:
                    file_data = json.load(file)
                    check_joints_in_frames(file_data, joints_mapping)
                    file_data_with_visibility = add_visibility_to_keypoints(
                        file_data)
                    file_df = flatternData(
                        file_data_with_visibility, joints_mapping)
                    file_df_resampled = resampleVideo(file_df, 100)
                    dataframes.append(file_df)
        if dataframes:
            activity_dfs[activity] = pd.concat(dataframes, ignore_index=True)
    return activity_dfs


def flatternData(data, joints_mapping):
    flattened_data = {}
    for timestamp, position in data['positions'].items():
        for i, joint in enumerate(joints_mapping):
            if joint in position:
                flattened_data[f'{timestamp}_{joint}_x'] = position[joint][0]
                flattened_data[f'{timestamp}_{joint}_y'] = position[joint][1]
                flattened_data[f'{timestamp}_{joint}_z'] = position[joint][2]
                flattened_data[f'{timestamp}_{joint}_v'] = position[joint][3]
    return pd.DataFrame([flattened_data])


def calculateMeanForAllVideos(df):
    mean_dfs = []
    for index, row in df.iterrows():
        mean_data_series = {}
        for column in row.index:
            # Extract joint name and position (e.g., Nose_x)
            joint = '_'.join(column.split('_')[1:])
            if joint not in mean_data_series:
                mean_data_series[joint] = []
            mean_data_series[joint].append(row[column])
        # Calculate the mean for each joint
        # print(next(iter(mean_data_series.items())))
        # mean_data = {joint: pd.Series(values).mean() for joint, values in mean_data_series.items()}
        mean_data = {}
        for joint, values in mean_data_series.items():
            mean_value = pd.Series(values).mean()
            mean_data[joint] = mean_value
        # Create a DataFrame for the current video
        mean_df = pd.DataFrame(mean_data, index=[index])
        mean_dfs.append(mean_df)
    # Concatenate all mean DataFrames into one
    final_mean_df = pd.concat(mean_dfs, ignore_index=True)
    return final_mean_df


def add_visibility_to_keypoints(data):
    for frame in data["positions"].values():
        for keypoint in frame.values():
            keypoint.append(1)
    return data


def check_joints_in_frames(file_data, mapping):
    for frame, joints in file_data["positions"].items():
        missing_joints = [joint for joint in mapping if joint not in joints]
        if missing_joints:
            print(f"Frame {frame} is missing joints: {missing_joints}")


def resampleVideo(file_df, num_samples):
    timestamps = file_df.index.values
    new_timestamps = np.linspace(
        timestamps.min(), timestamps.max(), num_samples)
    joints_mapping = [col.split('_')[1]
                      for col in file_df.columns if col.endswith('_x')]
    interpolated_df = cubic_interpolation(file_df.to_dict(
        orient='list'), joints_mapping, new_timestamps)
    return interpolated_df

In [190]:
openpose_joint_mapping = [
    "Nose",
    "Left_eye",
    "Right_eye",
    "Left_ear",
    "Right_ear",
    "Left_shoulder",
    "Right_shoulder",
    "Left_elbow",
    "Right_elbow",
    "Left_wrist",
    "Right_wrist",
    "Left_hip",
    "Right_hip",
    "Left_knee",
    "Right_knee",
    "Left_ankle",
    "Right_ankle"
]

In [191]:
# path to data
group1A_path = 'C:\\Users\\PhucNg\\Documents\\KeraalDataset\\Keraal only data\\Group1A\\blazepose'
group2A_path = 'C:\\Users\\PhucNg\\Documents\\KeraalDataset\\Keraal only data\\Group2A\\blazepose'
group3_path = 'C:\\Users\\PhucNg\\Documents\\KeraalDataset\\Keraal only data\\Group3\\blazepose'

# group1B_path = 'C:\\Users\\PhucNg\\Documents\\KeraalDataset\\Keraal only data\\Group1B\\blazepose'
group2B_path = 'C:\\Users\\PhucNg\\Documents\\KeraalDataset\\Keraal only data\\Group2B\\blazepose'

# activities
# activities = ['CTK', 'ELK', 'RTK']
activities = ['ELK']

# Get paths
group1A_files_path = getFilesPath(group1A_path)
group2A_files_path = getFilesPath(group2A_path)
group3_files_path = getFilesPath(group3_path)

# group1B_files_path = getFilesPath(group1B_path)
group2B_files_path = getFilesPath(group2B_path)

# Load data
group1A_df = loadFiles(group1A_files_path, activities, openpose_joint_mapping)
group2A_df = loadFiles(group2A_files_path, activities, openpose_joint_mapping)
group3_df = loadFiles(group3_files_path, activities, openpose_joint_mapping)

# group1B_df = loadFiles(group1B_files_path, activities,openpose_joint_mapping )
# group2B_df = loadFiles(group2B_files_path, activities,openpose_joint_mapping)

KeyboardInterrupt: 

In [145]:
group1A_df["ELK"]

Unnamed: 0,1.0_Nose_x,1.0_Nose_y,1.0_Nose_z,1.0_Left_eye_x,1.0_Left_eye_y,1.0_Left_eye_z,1.0_Right_eye_x,1.0_Right_eye_y,1.0_Right_eye_z,1.0_Left_ear_x,...,676.0_Left_knee_z,676.0_Right_knee_x,676.0_Right_knee_y,676.0_Right_knee_z,676.0_Left_ankle_x,676.0_Left_ankle_y,676.0_Left_ankle_z,676.0_Right_ankle_x,676.0_Right_ankle_y,676.0_Right_ankle_z
0,0.506654,0.434217,-0.246330,0.508625,0.425578,-0.236028,0.503769,0.426956,-0.237425,0.510466,...,,,,,,,,,,
1,0.445212,0.391918,-0.253624,0.447953,0.383226,-0.243294,0.441520,0.382984,-0.244160,0.450395,...,,,,,,,,,,
2,0.446983,0.390229,-0.257158,0.450190,0.380646,-0.246087,0.442169,0.380898,-0.247759,0.452591,...,,,,,,,,,,
3,0.507294,0.433659,-0.266936,0.508131,0.425259,-0.257267,0.505305,0.426023,-0.257850,0.509257,...,,,,,,,,,,
4,0.526960,0.435775,-0.299513,0.529731,0.426662,-0.290633,0.523837,0.427467,-0.289381,0.530872,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79,0.632049,0.521865,-0.145503,0.635075,0.522493,-0.156092,0.630787,0.521131,-0.153186,0.636640,...,,,,,,,,,,
80,0.604466,0.268525,-0.120081,0.598362,0.268283,-0.111930,0.598195,0.271391,-0.134456,0.589088,...,,,,,,,,,,
81,0.538186,0.413949,-0.380925,0.543670,0.399880,-0.369681,0.531830,0.400812,-0.369270,0.551331,...,,,,,,,,,,
82,0.538934,0.410393,-0.350805,0.545337,0.397023,-0.337551,0.533414,0.397765,-0.338365,0.552243,...,,,,,,,,,,


In [146]:
group1A_mean_df = {activity: calculateMeanForAllVideos(
    df) for activity, df in group1A_df.items()}
group2A_mean_df = {activity: calculateMeanForAllVideos(
    df) for activity, df in group2A_df.items()}
group3_mean_df = {activity: calculateMeanForAllVideos(
    df) for activity, df in group3_df.items()}

# group1B_mean_df = {activity: calculateMeanForAllVideos(
#     df) for activity, df in group1B_df.items()}
# group2B_mean_df = {activity: calculateMeanForAllVideos(
#     df) for activity, df in group2B_df.items()}


# Add labels
for activity, df in group1A_mean_df.items():
    df['Backpain'] = 1  # Backpain = 1
for activity, df in group2A_mean_df.items():
    df['Backpain'] = 0  # Backpain = 0
for activity, df in group3_mean_df.items():
    df['Backpain'] = 0  # Backpain = 0


merged_df_by_activity = {
    activity: pd.concat([
        group1A_mean_df[activity],
        group2A_mean_df[activity],
        group3_mean_df[activity]
    ], ignore_index=True) for activity in activities
}

# Display the final DataFrame
merged_df_by_activity[activities[0]]

Unnamed: 0,Nose_x,Nose_y,Nose_z,Left_eye_x,Left_eye_y,Left_eye_z,Right_eye_x,Right_eye_y,Right_eye_z,Left_ear_x,...,Right_knee_x,Right_knee_y,Right_knee_z,Left_ankle_x,Left_ankle_y,Left_ankle_z,Right_ankle_x,Right_ankle_y,Right_ankle_z,Backpain
0,0.507558,0.438477,-0.064163,0.507751,0.430958,-0.064601,0.507541,0.431153,-0.068044,0.507624,...,0.509573,0.702385,-0.002875,0.505115,0.793862,0.076614,0.508431,0.795375,0.058590,1
1,0.446458,0.388771,-0.032510,0.445825,0.380860,-0.038054,0.446623,0.380528,-0.040009,0.445067,...,0.456617,0.659530,0.029751,0.449965,0.753782,0.075917,0.459185,0.752604,0.071833,1
2,0.445677,0.388276,-0.137325,0.447333,0.380033,-0.130640,0.443643,0.379628,-0.133172,0.448437,...,0.450259,0.659658,0.006257,0.457528,0.750221,0.119469,0.450616,0.750538,0.100803,1
3,0.512753,0.435617,-0.137139,0.513586,0.427295,-0.134903,0.511525,0.427878,-0.138014,0.513520,...,0.505454,0.699998,-0.003207,0.511160,0.794263,0.090010,0.503864,0.794507,0.075653,1
4,0.524127,0.435436,-0.125112,0.525716,0.426912,-0.120018,0.522686,0.427456,-0.120450,0.526903,...,0.513313,0.702609,-0.000578,0.532063,0.797685,0.068312,0.509208,0.796763,0.079890,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267,0.524884,0.500328,-0.187593,0.529534,0.491539,-0.175563,0.520279,0.491102,-0.176488,0.533404,...,0.504809,0.734201,-0.151379,0.546567,0.832228,-0.062504,0.506038,0.839900,-0.065126,0
268,0.525921,0.493830,-0.164125,0.530104,0.484908,-0.151081,0.521140,0.485325,-0.152603,0.532928,...,0.504425,0.730304,-0.150273,0.546304,0.830749,-0.071854,0.505304,0.839459,-0.069903,0
269,0.526288,0.492319,-0.186240,0.530554,0.483333,-0.172897,0.521464,0.483678,-0.174663,0.533428,...,0.511321,0.725308,-0.154554,0.534609,0.836592,-0.088454,0.514812,0.839144,-0.069473,0
270,0.521463,0.495128,-0.179425,0.526264,0.485628,-0.167103,0.517131,0.485549,-0.167726,0.530068,...,0.508348,0.730455,-0.148621,0.534803,0.838047,-0.098454,0.514094,0.841173,-0.068695,0


In [147]:
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Assuming merged_df_by_activity is already defined and contains the merged DataFrame for each activity
df_ctk = merged_df_by_activity['ELK']

# Prepare the data
X = df_ctk.drop(columns=['Backpain'])  # Features
y = df_ctk['Backpain']  # Labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Define the parameter grid for the KNeighborsClassifier
param_grid = {
    'n_neighbors': [3, 4, 5],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}

# Initialize the KNeighborsClassifier
knn_clf = KNeighborsClassifier()

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator=knn_clf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=3)

# Perform grid search and cross-validation
grid_search.fit(X_train, y_train)

# Get the best parameters and best estimator
best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

# Print the best parameters and best score
print(f"Best parameters: {best_params}")
print(f"Best cross-validation score: {grid_search.best_score_}")

# Evaluate the model
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test set accuracy: {accuracy}")

Fitting 5 folds for each of 24 candidates, totalling 120 fits
Best parameters: {'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'uniform'}
Best cross-validation score: 0.9861522198731502
Test set accuracy: 1.0


In [148]:
group2B_df = loadFiles(group2B_files_path, activities, openpose_joint_mapping)

group2B_mean_df = {activity: calculateMeanForAllVideos(
    df) for activity, df in group2B_df.items()}

for activity, df in group2B_mean_df.items():
    df['Backpain'] = 0  # Backpain = 0

In [149]:
# use the above model to predict the backpain for group2B
df_2B = group2B_mean_df['ELK']
X_2B = df_2B.drop(columns=['Backpain'])
y_2B = df_2B['Backpain']

y_pred_2B = grid_search.predict(X_2B)
accuracy_2B = accuracy_score(y_2B, y_pred_2B)

print(f"Group 2B test set accuracy: {accuracy_2B}")
# print(group2B_mean_df)

Group 2B test set accuracy: 0.13043478260869565


In [150]:
def calculateStatisticalFeatures(df):
    features = {}
    for column in df.columns:
        features[f'{column}_mean'] = df[column].mean()
        features[f'{column}_std'] = df[column].std()
        features[f'{column}_min'] = df[column].min()
        features[f'{column}_max'] = df[column].max()
    return features


def labelData(dataframes, label):
    labeled_data = []
    for df in dataframes:
        features = calculateStatisticalFeatures(df)
        features['label'] = label
        labeled_data.append(features)
    return labeled_data


def combineData(labeled_data):
    return pd.DataFrame(labeled_data)

In [151]:
group2A_labeled_data = labelData(group2A_df, label=0)

AttributeError: 'str' object has no attribute 'columns'