In [1]:
# Imports

# for running on wayland, ignore on other platforms #
import os                                           #
os.environ["XDG_SESSION_TYPE"] = "xcb"              #
#####################################################

import cv2
from ultralytics import YOLO
import pandas as pd
from IPython.display import display


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.dataset import random_split
import numpy as np


In [None]:
# dataframe generation and class attribute addition

pose_model = YOLO('yolov8s-pose.pt')

main_dir = '../backup/dataset/train/' #

position_df = pd.DataFrame()

for class_name in os.listdir(main_dir):
    class_path = os.path.join(main_dir, class_name)
    
    if os.path.isdir(class_path):
        for filename in os.listdir(class_path):
            if filename.endswith(('.jpg', '.jpeg', '.png')):
                image_path = os.path.join(class_path, filename)
                frame = cv2.imread(image_path)

                results = pose_model(frame, device=0)

                x_values = []
                y_values = []
                data = {}
                img_df = pd.DataFrame()

                for result in results:
                    kpts = result.keypoints
                    upperbody_kpts = 11

                    for person in range(len(kpts)):
                        for kp_index in range(upperbody_kpts):
                            keypoint = kpts.xy[person, kp_index]
                            x, y = int(keypoint[0].item()), int(keypoint[1].item())

                            x_values.append(x)
                            y_values.append(y)

                        for i in range(len(x_values)):
                            data[f'x_{i}'] = x_values[i]
                            data[f'y_{i}'] = y_values[i]

                        x_values = []
                        y_values = []

                        # Add a column for class (0 for "healthy", 1 for "risk")
                        data['class'] = 0 if class_name == 'healthy' else 1

                        temp_df = pd.DataFrame([data])
                        img_df = pd.concat([img_df, temp_df], ignore_index=True)
                        temp_df = {}

                # Concatenate per-image DataFrame to the main DataFrame
                position_df = pd.concat([position_df, img_df], ignore_index=True)

# Display the resulting DataFrame
display(position_df)


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


X = position_df.iloc[:, :-1]  # All columns except the last one
y = position_df.iloc[:, -1]   # The last column

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = RandomForestClassifier(n_estimators=100, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)


Accuracy: 0.7948717948717948
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.93      0.83        43
           1       0.88      0.63      0.73        35

    accuracy                           0.79        78
   macro avg       0.82      0.78      0.78        78
weighted avg       0.81      0.79      0.79        78

