### Constantes

In [1]:
IMAGE_SIZE = 640

labels = {'standing': 0, 'takedown': 1, 'open_guard': 2, 'half_guard': 3, 'closed_guard': 4, '5050_guard': 5, 'side_control': 6, 'mount': 7, 'back': 8, 'turtle': 9}
body_parts = ["nose", "left eye", "right eye", "left ear", "right ear", "left shoulder", "right shoulder", "left elbow", "right elbow", "left wrist", "right wrist", "left hip", "right hip", "left knee", "right knee", "left ankle", "right ankle"]
num_labels = len(labels)
num_keypoints = 17

### Importando modelo coco


In [2]:
# STEP 1: Import the necessary modules.
from ultralytics import YOLO

# Load a model
pose_model = YOLO('../../coco_model/yolov8n-pose.pt') 

### Testando modelo

In [3]:
import cv2 as cv

def draw_image_with_keypoints(image_path, keypoints):
    image = cv.imread(image_path)

    image = cv.resize(image, (IMAGE_SIZE, IMAGE_SIZE))

    for i in range(num_keypoints):
        x = int(keypoints[i][0].item())
        y = int(keypoints[i][1].item())
        cv.circle(image, (x, y), 5, (0, 255, 0), -1)
        cv.putText(image, body_parts[i], (x, y), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
    cv.imshow('image', image)
    cv.waitKey(0)

### Carregando modelo pose BJJ

In [4]:
import tensorflow as tf

model = tf.keras.models.load_model("jiu_model.h5")





### Funções úteis

In [22]:
import matplotlib.pyplot as plt # type: ignore
import numpy as np

def get_keypoints(image_path):
    predictions = pose_model(image_path)[0]
    
    if len(predictions) == 0:
        return None

    keypoints = predictions[0].keypoints
    
    keypoints_tensor = keypoints.data
    
    keypoints = keypoints_tensor.numpy()
    
    return keypoints

def pose_prediction(keypoints):
    keypoints = np.array(keypoints)
    
    keypoints = keypoints.reshape(1, 51)
    
    max_x = np.max(keypoints)
    
    prediction = model.predict(keypoints / max_x)
    
    print(prediction)
    
    max_index = np.argmax(prediction)
    
    return max_index   

### Classificação em imagem única

In [23]:
import os

image_path = '../../test_images/standing.jpg'

temp_path = '../../tmp'

# resize the image
image = cv.imread(image_path)

image = cv.resize(image, (IMAGE_SIZE, IMAGE_SIZE))

cv.imwrite(temp_path + '/temp.jpg', image)

keypoints = get_keypoints(temp_path + '/temp.jpg')

draw_image_with_keypoints(temp_path + '/temp.jpg', keypoints[0])

label_index = pose_prediction(keypoints)

print("Predicted Label: ", list(labels.keys())[label_index])

os.remove(temp_path + '/temp.jpg')




image 1/1 c:\Users\guilh\Documents\bjj_ia\versions\v1\..\..\tmp\temp.jpg: 640x640 2 persons, 73.0ms
Speed: 1.0ms preprocess, 73.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)
[[    0.97779    0.022202  3.9288e-08    8.26e-22  9.7703e-09  3.0819e-29  8.3024e-32  1.6243e-17  3.4289e-06  5.4712e-19]]
Predicted Label:  standing


### Classificação em vídeo em tempo real

In [24]:
import cv2 # type: ignore

cap = cv2.VideoCapture("../../test_videos/video.mp4")

while cap.isOpened():
    
    ret, frame = cap.read()
    
    if not ret:
        break
    
    frame = cv2.resize(frame, (IMAGE_SIZE, IMAGE_SIZE))
    
    keypoints = get_keypoints(frame)
    
    if keypoints is None:
        continue
    
    label_index = pose_prediction(keypoints)
    
    print("Predicted Label: ", list(labels.keys())[label_index])
    
    for i in range(num_keypoints):
        x = int(keypoints[0][i][0].item())
        y = int(keypoints[0][i][1].item())
        cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)
        cv2.putText(frame, body_parts[i], (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    
    cv2.imshow("Frame", frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cap.release()
        cv2.destroyAllWindows()
        break




0: 640x640 2 persons, 133.0ms
Speed: 3.2ms preprocess, 133.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)
[[ 1.0259e-07  5.3879e-07     0.99969  3.6946e-36   4.223e-08  7.6288e-18           0  8.4133e-15  0.00031302   4.309e-12]]
Predicted Label:  open_guard

0: 640x640 2 persons, 76.0ms
Speed: 2.0ms preprocess, 76.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)
[[ 2.1152e-07  9.3435e-07     0.99963  7.6482e-36  4.6623e-08  1.1478e-17           0  9.8124e-15  0.00037352  8.0736e-12]]
Predicted Label:  open_guard

0: 640x640 2 persons, 67.5ms
Speed: 3.0ms preprocess, 67.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)
[[ 5.0163e-07   8.012e-07     0.99961  4.1114e-36  4.5722e-08  1.4286e-17           0  2.7841e-15  0.00039239  5.2733e-12]]
Predicted Label:  open_guard

0: 640x640 2 persons, 71.0ms
Speed: 2.0ms preprocess, 71.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)
[[ 3.0531e-06  4.7185e-06      0