In [170]:
import os
import json


import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
from scipy.interpolate import CubicSpline

In [171]:
sample_video = {
    "positions": {
        "1.0": {
            "Nose": [0.42, 0.38, -0.27],
            "Left_eye_inner": [0.47, 0.37, -0.26],
        },
        "2.0": {
            "Nose":  [0.42, 0.38, -0.27],
            "Left_eye_inner":  [0.42, 0.38, -0.27],
        },
        "3.0": {
            "Nose":  [0.42, 0.38, -0.27],
            "Left_eye_inner":  [0.42, 0.38, -0.27],
        },
        "4.0": {
            "Nose":  [0.42, 0.38, -0.27],
            "Left_eye_inner":  [0.42, 0.38, -0.27],
        },
    }
}

sample_video["positions"]["1.0"]["Nose"]

[0.42, 0.38, -0.27]

  <!-- for column in row.index:
            # Extract joint name and position (e.g., Nose_x)
            joint = '_'.join(column.split('_')[1:])
            if joint not in mean_data_series:
                mean_data_series[joint] = []
            mean_data_series[joint].append(row[column]) -->

Let say we have
a[1_a_x:5, 1_a_y:4, 2_a_x:3, 2_a_y:7]
The above code give:
result = {
a_x:[5,3],
a_y:[4,7]
}


In [172]:
def getFilesPath(directory):
    files = []
    for root, dirs, file in os.walk(directory):
        for f in file:
            files.append(os.path.join(root, f))
    return files


def loadFiles(files_path, activities, joints_mapping):
    activity_dfs = []
    for activity in activities:
        for file_path in files_path:
            if file_path.endswith('.json') and activity in file_path:
                with open(file_path, 'r') as file:
                    file_data = json.load(file)
                    resample_file_data = resample_video(
                        file_data, joints_mapping, 100)
                    # file_data_with_visibility = add_visibility_to_keypoints(file_data)
                    file_df = flatternData(resample_file_data, joints_mapping)
                    print(file.name)
                    file_df.insert(0, 'group', file_path.split('-')[0])
                    file_df.insert(1, 'activity', activity)
                    activity_dfs.append(file_df)
    if activity_dfs:
        final_df = pd.concat(activity_dfs, ignore_index=True)
        return final_df
    else:
        return pd.DataFrame()


def flatternData(data, joints_mapping):
    flattened_data = {}
    for timestamp, position in data.items():
        for joint in joints_mapping:
            if joint in position:
                flattened_data[f'{timestamp}_{joint}_x'] = position[joint][0]
                flattened_data[f'{timestamp}_{joint}_y'] = position[joint][1]
                flattened_data[f'{timestamp}_{joint}_z'] = position[joint][2]
    return pd.DataFrame([flattened_data])


def calculateMeanForAllVideos(df):
    mean_dfs = []
    for index, row in df.iterrows():
        mean_data_series = {}
        for column in row.index:
            # Extract joint name and position (e.g., Nose_x)
            joint = '_'.join(column.split('_')[1:])
            if joint not in mean_data_series:
                mean_data_series[joint] = []
            mean_data_series[joint].append(row[column])
        # Calculate the mean for each joint
        # print(next(iter(mean_data_series.items())))
        # mean_data = {joint: pd.Series(values).mean() for joint, values in mean_data_series.items()}
        mean_data = {}
        for joint, values in mean_data_series.items():
            mean_value = pd.Series(values).mean()
            mean_data[joint] = mean_value
        # Create a DataFrame for the current video
        mean_df = pd.DataFrame(mean_data, index=[index])
        mean_dfs.append(mean_df)
    # Concatenate all mean DataFrames into one
    final_mean_df = pd.concat(mean_dfs, ignore_index=True)
    return final_mean_df


def resample_video(video_data, joint_mapping, target_frames=30):
    # Get the original frame numbers and convert to float
    original_frames = np.array([float(k)
                               for k in video_data['positions'].keys()])
    # check that video has at least 4 frames

    if len(original_frames) < 4:
        raise ValueError("Video must have at least 4 frames")
    # Create new evenly spaced frames
    new_frames = np.linspace(min(original_frames),
                             max(original_frames), target_frames)

    # Get all unique joints
    joints = list(next(iter(video_data['positions'].values())).keys())

    # Initialize the resampled data structure
    resampled_data = {str(float(i)): {} for i in range(1, target_frames + 1)}

    # Interpolate each joint's coordinates
    for joint in joint_mapping:
        # Extract x, y, z coordinates for the current joint across all frames
        x_coords = [video_data['positions']
                    [str(frame)][joint][0] for frame in original_frames]
        y_coords = [video_data['positions']
                    [str(frame)][joint][1] for frame in original_frames]
        z_coords = [video_data['positions']
                    [str(frame)][joint][2] for frame in original_frames]

        # Create cubic interpolation functions for each coordinate
        x_interp = interp1d(original_frames, x_coords, kind='cubic')
        y_interp = interp1d(original_frames, y_coords, kind='cubic')
        z_interp = interp1d(original_frames, z_coords, kind='cubic')

        # Apply interpolation to get new coordinates
        for i, frame in enumerate(new_frames, 1):  # Start counting from 1
            resampled_data[str(float(i))][joint] = [
                float(x_interp(frame)),
                float(y_interp(frame)),
                float(z_interp(frame))
            ]
    return resampled_data

In [173]:
openpose_joint_mapping = [
    "Nose",
    "Left_eye",
    "Right_eye",
    "Left_ear",
    "Right_ear",
    "Left_shoulder",
    "Right_shoulder",
    "Left_elbow",
    "Right_elbow",
    "Left_wrist",
    "Right_wrist",
    "Left_hip",
    "Right_hip",
    "Left_knee",
    "Right_knee",
    "Left_ankle",
    "Right_ankle"
]

In [174]:
# path to data
group1A_path = 'C:\\Users\\PhucNg\\Documents\\KeraalDataset\\Keraal only data\\Group1A\\blazepose'
group2A_path = 'C:\\Users\\PhucNg\\Documents\\KeraalDataset\\Keraal only data\\Group2A\\blazepose'
group3_path = 'C:\\Users\\PhucNg\\Documents\\KeraalDataset\\Keraal only data\\Group3\\blazepose'

# group1B_path = 'C:\\Users\\PhucNg\\Documents\\KeraalDataset\\Keraal only data\\Group1B\\blazepose'
group2B_path = 'C:\\Users\\PhucNg\\Documents\\KeraalDataset\\Keraal only data\\Group2B\\blazepose'

# activities
# activities = ['CTK', 'ELK', 'RTK']
activities = ['ELK']

# Get paths
group1A_files_path = getFilesPath(group1A_path)
group2A_files_path = getFilesPath(group2A_path)
group3_files_path = getFilesPath(group3_path)

# group1B_files_path = getFilesPath(group1B_path)
group2B_files_path = getFilesPath(group2B_path)

# Load data
# group1A_df = loadFiles(group1A_files_path, activities, openpose_joint_mapping)
group2A_df = loadFiles(group2A_files_path, activities, openpose_joint_mapping)
# group3_df = loadFiles(group3_files_path, activities, openpose_joint_mapping)

# group1B_df = loadFiles(group1B_files_path, activities,openpose_joint_mapping )
# group2B_df = loadFiles(group2B_files_path, activities,openpose_joint_mapping)

C:\Users\PhucNg\Documents\KeraalDataset\Keraal only data\Group2A\blazepose\G2A-BP-ELK-S1-Roscoff-005.json
C:\Users\PhucNg\Documents\KeraalDataset\Keraal only data\Group2A\blazepose\G2A-BP-ELK-S1-Roscoff-060.json
C:\Users\PhucNg\Documents\KeraalDataset\Keraal only data\Group2A\blazepose\G2A-BP-ELK-S1-Roscoff-071.json
C:\Users\PhucNg\Documents\KeraalDataset\Keraal only data\Group2A\blazepose\G2A-BP-ELK-S1-Roscoff-072.json
C:\Users\PhucNg\Documents\KeraalDataset\Keraal only data\Group2A\blazepose\G2A-BP-ELK-S1-Roscoff-077.json
C:\Users\PhucNg\Documents\KeraalDataset\Keraal only data\Group2A\blazepose\G2A-BP-ELK-S3-Roscoff-007.json
C:\Users\PhucNg\Documents\KeraalDataset\Keraal only data\Group2A\blazepose\G2A-BP-ELK-S3-Roscoff-021.json
C:\Users\PhucNg\Documents\KeraalDataset\Keraal only data\Group2A\blazepose\G2A-BP-ELK-S3-Roscoff-085.json


In [175]:
group2A_df

Unnamed: 0,group,activity,1.0_Nose_x,1.0_Nose_y,1.0_Nose_z,1.0_Left_eye_x,1.0_Left_eye_y,1.0_Left_eye_z,1.0_Right_eye_x,1.0_Right_eye_y,...,100.0_Left_knee_z,100.0_Right_knee_x,100.0_Right_knee_y,100.0_Right_knee_z,100.0_Left_ankle_x,100.0_Left_ankle_y,100.0_Left_ankle_z,100.0_Right_ankle_x,100.0_Right_ankle_y,100.0_Right_ankle_z
0,C:\Users\PhucNg\Documents\KeraalDataset\Keraal...,ELK,0.514778,0.419148,-0.429015,0.52256,0.405636,-0.411748,0.506505,0.405866,...,-0.062647,0.46069,0.750124,-0.083118,0.552098,0.88048,0.136885,0.466148,0.870986,0.11952
1,C:\Users\PhucNg\Documents\KeraalDataset\Keraal...,ELK,0.514341,0.431062,-0.424424,0.523245,0.415488,-0.414389,0.506915,0.416008,...,-0.05369,0.466077,0.751788,-0.067345,0.552154,0.887764,0.107439,0.466415,0.874737,0.09825
2,C:\Users\PhucNg\Documents\KeraalDataset\Keraal...,ELK,0.514805,0.43249,-0.383239,0.523405,0.418253,-0.372708,0.50749,0.417958,...,-0.034927,0.463389,0.754481,-0.038195,0.548922,0.887132,0.144857,0.465611,0.876084,0.146514
3,C:\Users\PhucNg\Documents\KeraalDataset\Keraal...,ELK,0.516031,0.423607,-0.402091,0.52436,0.410526,-0.389145,0.508459,0.40959,...,-0.065246,0.457223,0.749706,-0.091442,0.55165,0.888258,0.138269,0.463789,0.877048,0.114274
4,C:\Users\PhucNg\Documents\KeraalDataset\Keraal...,ELK,0.519944,0.411533,-0.351231,0.528436,0.398234,-0.336004,0.511754,0.398824,...,-0.036558,0.464567,0.755947,-0.021021,0.550256,0.889177,0.154937,0.465427,0.876405,0.171741
5,C:\Users\PhucNg\Documents\KeraalDataset\Keraal...,ELK,0.542199,0.452901,-0.38907,0.550062,0.441218,-0.376274,0.535107,0.438716,...,-0.070763,0.5286,0.762715,-0.193475,0.571739,0.874618,0.023662,0.497107,0.887096,-0.113742
6,C:\Users\PhucNg\Documents\KeraalDataset\Keraal...,ELK,0.545293,0.4587,-0.393069,0.551719,0.446281,-0.379759,0.53844,0.444158,...,-0.122346,0.50051,0.779611,-0.063665,0.574576,0.881748,0.035334,0.493822,0.891501,0.071367
7,C:\Users\PhucNg\Documents\KeraalDataset\Keraal...,ELK,0.536489,0.447506,-0.315217,0.545262,0.434548,-0.304267,0.530613,0.434941,...,-0.038811,0.512424,0.796181,-0.028391,0.571463,0.888245,0.135023,0.502794,0.896158,0.12215


In [176]:
group1A_mean_df = {activity: calculateMeanForAllVideos(
    df) for activity, df in group1A_df.items()}
group2A_mean_df = {activity: calculateMeanForAllVideos(
    df) for activity, df in group2A_df.items()}
group3_mean_df = {activity: calculateMeanForAllVideos(
    df) for activity, df in group3_df.items()}

# group1B_mean_df = {activity: calculateMeanForAllVideos(
#     df) for activity, df in group1B_df.items()}
# group2B_mean_df = {activity: calculateMeanForAllVideos(
#     df) for activity, df in group2B_df.items()}


# Add labels
for activity, df in group1A_mean_df.items():
    df['Backpain'] = 1  # Backpain = 1
for activity, df in group2A_mean_df.items():
    df['Backpain'] = 0  # Backpain = 0
for activity, df in group3_mean_df.items():
    df['Backpain'] = 0  # Backpain = 0


merged_df_by_activity = {
    activity: pd.concat([
        group1A_mean_df[activity],
        group2A_mean_df[activity],
        group3_mean_df[activity]
    ], ignore_index=True) for activity in activities
}

# Display the final DataFrame
merged_df_by_activity[activities[0]]

NameError: name 'group1A_df' is not defined

In [147]:
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Assuming merged_df_by_activity is already defined and contains the merged DataFrame for each activity
df_ctk = merged_df_by_activity['ELK']

# Prepare the data
X = df_ctk.drop(columns=['Backpain'])  # Features
y = df_ctk['Backpain']  # Labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Define the parameter grid for the KNeighborsClassifier
param_grid = {
    'n_neighbors': [3, 4, 5],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}

# Initialize the KNeighborsClassifier
knn_clf = KNeighborsClassifier()

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator=knn_clf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=3)

# Perform grid search and cross-validation
grid_search.fit(X_train, y_train)

# Get the best parameters and best estimator
best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

# Print the best parameters and best score
print(f"Best parameters: {best_params}")
print(f"Best cross-validation score: {grid_search.best_score_}")

# Evaluate the model
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test set accuracy: {accuracy}")

Fitting 5 folds for each of 24 candidates, totalling 120 fits
Best parameters: {'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'uniform'}
Best cross-validation score: 0.9861522198731502
Test set accuracy: 1.0


In [148]:
group2B_df = loadFiles(group2B_files_path, activities, openpose_joint_mapping)

group2B_mean_df = {activity: calculateMeanForAllVideos(
    df) for activity, df in group2B_df.items()}

for activity, df in group2B_mean_df.items():
    df['Backpain'] = 0  # Backpain = 0

In [149]:
# use the above model to predict the backpain for group2B
df_2B = group2B_mean_df['ELK']
X_2B = df_2B.drop(columns=['Backpain'])
y_2B = df_2B['Backpain']

y_pred_2B = grid_search.predict(X_2B)
accuracy_2B = accuracy_score(y_2B, y_pred_2B)

print(f"Group 2B test set accuracy: {accuracy_2B}")
# print(group2B_mean_df)

Group 2B test set accuracy: 0.13043478260869565


In [150]:
def calculateStatisticalFeatures(df):
    features = {}
    for column in df.columns:
        features[f'{column}_mean'] = df[column].mean()
        features[f'{column}_std'] = df[column].std()
        features[f'{column}_min'] = df[column].min()
        features[f'{column}_max'] = df[column].max()
    return features


def labelData(dataframes, label):
    labeled_data = []
    for df in dataframes:
        features = calculateStatisticalFeatures(df)
        features['label'] = label
        labeled_data.append(features)
    return labeled_data


def combineData(labeled_data):
    return pd.DataFrame(labeled_data)

In [151]:
group2A_labeled_data = labelData(group2A_df, label=0)

AttributeError: 'str' object has no attribute 'columns'