### Constantes

In [1]:
IMAGE_SIZE = 640

labels = {'standing': 0, 'takedown': 1, 'open_guard': 2, 'half_guard': 3, 'closed_guard': 4, '5050_guard': 5, 'side_control': 6, 'mount': 7, 'back': 8, 'turtle': 9}
body_parts = ["nose", "left eye", "right eye", "left ear", "right ear", "left shoulder", "right shoulder", "left elbow", "right elbow", "left wrist", "right wrist", "left hip", "right hip", "left knee", "right knee", "left ankle", "right ankle"]
num_labels = len(labels)
num_keypoints = 17
num_players = 2

### Importando modelo coco


In [2]:
# STEP 1: Import the necessary modules.
from ultralytics import YOLO

# Load a model
pose_model = YOLO('../../coco_model/yolov8n-pose.pt') 

### Testando modelo

In [25]:
import cv2 as cv

def draw_image_with_keypoints(image_path, keypoints):
    image = cv.imread(image_path)

    image = cv.resize(image, (IMAGE_SIZE, IMAGE_SIZE))

    for keypoint_set in keypoints:
        for i in range(num_keypoints):
            x = int(keypoint_set[i][0])
            y = int(keypoint_set[i][1])
            cv.circle(image, (x, y), 5, (255, 0, 0), -1)
        
    cv.imshow('image', image)
    cv.waitKey(0)

### Carregando modelo pose BJJ

In [26]:
import tensorflow as tf

model = tf.keras.models.load_model("jiu_model.h5")

### Funções úteis

In [33]:
import matplotlib.pyplot as plt # type: ignore
import numpy as np

def get_keypoints(image_path):
    predictions = pose_model(image_path)[0]
    
    if len(predictions) == 0:
        return None
    
    keypoints = predictions[0].keypoints
    keypoints_tensor = keypoints.data
    keypoints = keypoints_tensor.numpy()
    
    if len(predictions) > 1:
        keypoints2 = predictions[1].keypoints
        keypoints_tensor2 = keypoints2.data
        keypoints2 = keypoints_tensor2.numpy()
    else:
        keypoints2 = np.array([[[0, 0, 0]] * num_keypoints])
    
    keypoints = np.concatenate((keypoints, keypoints2), axis=0)
    
    return keypoints

def pose_prediction(keypoints):
    keypoints = np.array(keypoints)
    
    keypoints = keypoints.reshape(1, num_keypoints * 3 * 2)
    
    print(keypoints)
    
    max_x = np.max(keypoints)
    
    prediction = model.predict(keypoints / max_x)
    
    max_index = np.argmax(prediction)
    
    return max_index   

### Classificação em imagem única

In [34]:
import os

image_path = '../../test_images/back01.jpeg'

temp_path = '../../tmp'

# resize the image
image = cv.imread(image_path)

image = cv.resize(image, (IMAGE_SIZE, IMAGE_SIZE))

cv.imwrite(temp_path + '/temp.jpg', image)

keypoints = get_keypoints(temp_path + '/temp.jpg')

draw_image_with_keypoints(temp_path + '/temp.jpg', keypoints)

label_index = pose_prediction(keypoints)

print("Predicted Label: ", list(labels.keys())[label_index])

os.remove(temp_path + '/temp.jpg')


image 1/1 c:\Users\guilh\Documents\bjj_ia\versions\v2\..\..\tmp\temp.jpg: 640x640 1 person, 114.0ms
Speed: 2.0ms preprocess, 114.0ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)
[[     163.12      341.46     0.98865      142.04      309.29     0.95776      139.22      346.42     0.95784      148.95      250.68     0.50958      134.95      341.64       0.585      273.37      217.02     0.99135      182.91      358.37     0.98414      475.08      245.45     0.93354      190.91      503.12
       0.8824      540.52      241.02     0.90885      263.35      452.33     0.88671      425.74       281.8     0.86828       380.1      352.48     0.83464      481.16      277.43     0.88236      480.53      353.42     0.87674      556.41      409.51     0.56369      558.28      437.53     0.58835           0
            0           0           0           0           0           0           0           0           0           0           0           0           0           0    

### Classificação em vídeo em tempo real

In [35]:
import cv2 # type: ignore

cap = cv2.VideoCapture("../../test_videos/video.mp4")

while cap.isOpened():
    
    ret, frame = cap.read()
    
    if not ret:
        break
    
    frame = cv2.resize(frame, (IMAGE_SIZE, IMAGE_SIZE))
    
    keypoints = get_keypoints(frame)
    
    if keypoints is None:
        continue
    
    label_index = pose_prediction(keypoints)
    
    print("Predicted Label: ", list(labels.keys())[label_index])
    
    for keypoint_set in keypoints:
        for i in range(num_keypoints):
            x = int(keypoint_set[i][0].item())
            y = int(keypoint_set[i][1].item())
            cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)
    
    cv2.imshow("Frame", frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cap.release()
        cv2.destroyAllWindows()
        break




0: 640x640 2 persons, 80.0ms
Speed: 2.0ms preprocess, 80.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)
[[     392.74      115.01      0.9692      401.23      105.43     0.96604      386.95       106.1      0.8069      420.72       108.2     0.92233           0           0     0.40592      432.22      165.28     0.99358      391.39      162.11     0.97768      426.53      246.45     0.97399       389.6      231.28
      0.83642      404.05      311.69     0.95027       369.7      282.88     0.81175      416.26      308.48     0.99709      388.87      305.21      0.9943         413       418.9      0.9959      388.03      411.63      0.9926      422.08      510.73     0.97198       394.8      500.48     0.96261      193.66
       63.804     0.95292      198.37      55.948     0.52329      188.89      53.865      0.9811           0           0     0.10507      164.96      50.084     0.97838      159.41      106.13     0.81837      142.79      101.94     0.99565     