<a href="https://colab.research.google.com/github/webcoder123/Deeplearning-Project-/blob/main/Complete__all_pose_and_object.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.74-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nv

In [2]:
import cv2
import numpy as np
from ultralytics import YOLO

# Load models
pose_model = YOLO("/content/yolo11n-pose.pt")  # Pose estimation model
object_model = YOLO("/content/best.pt")  # Object detection model

# Function to calculate angle between three points
def calculate_angle(a, b, c):
    if a is None or b is None or c is None:
        return None
    a, b, c = np.array(a), np.array(b), np.array(c)
    ba = a - b
    bc = c - b
    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-6)
    angle = np.degrees(np.arccos(np.clip(cosine_angle, -1.0, 1.0)))
    return angle

# Define action classification based on keypoints
def classify_pose(keypoints):
    if keypoints is None or len(keypoints) < 9:
        return "Unknown"

    nose, left_shoulder, right_shoulder, left_hip, right_hip, left_knee, right_knee, left_ankle, right_ankle = keypoints[:9]
    keypoint_check = all(kp is not None for kp in [left_hip, left_knee, left_ankle, right_hip, right_knee, right_ankle])
    if not keypoint_check:
        return "Unknown"

    left_knee_angle = calculate_angle(left_hip, left_knee, left_ankle)
    right_knee_angle = calculate_angle(right_hip, right_knee, right_ankle)
    torso_angle = calculate_angle(left_shoulder, left_hip, left_knee)
    arm_angle = calculate_angle(left_shoulder, nose, right_shoulder)

    if None in [left_knee_angle, right_knee_angle, torso_angle, arm_angle]:
        return "Unknown"

    if torso_angle < 45:
        return "Bending"
    elif left_knee_angle < 100 or right_knee_angle < 100:
        return "Walking"
    elif left_knee_angle > 160 and right_knee_angle > 160:
        return "Standing"
    return "Person"

# Video input
input_video = "/content/Bending.mp4"
cap = cv2.VideoCapture(input_video)
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = int(cap.get(cv2.CAP_PROP_FPS))

output_path = "merged_output.avi"
fourcc = cv2.VideoWriter_fourcc(*"XVID")
video_writer = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

# Process video
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Object detection
    object_results = object_model.predict(source=frame, conf=0.5)

    # Pose estimation
    pose_results = pose_model.predict(source=frame, conf=0.5)

    # Draw object detection results
    for result in object_results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 255), 2)
            label = f"Object: {int(box.conf[0] * 100)}%"
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)

    # Draw pose estimation results
    for result in pose_results:
        for box, keypoints in zip(result.boxes, result.keypoints.xy):
            keypoints = [tuple(map(int, kp)) for kp in keypoints]
            action = classify_pose(keypoints)

            x1, y1, x2, y2 = map(int, box.xyxy[0])
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"{action}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    video_writer.write(frame)

video_writer.release()
cap.release()
print(f"Processed video saved at: {output_path}")


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.

0: 672x384 (no detections), 303.6ms
Speed: 24.8ms preprocess, 303.6ms inference, 13.7ms postprocess per image at shape (1, 3, 672, 384)

0: 640x384 1 person, 121.9ms
Speed: 3.4ms preprocess, 121.9ms inference, 18.3ms postprocess per image at shape (1, 3, 640, 384)

0: 672x384 (no detections), 97.6ms
Speed: 5.6ms preprocess, 97.6ms inference, 0.4ms postprocess per image at shape (1, 3, 672, 384)

0: 640x384 1 person, 106.8ms
Speed: 3.5ms preprocess, 106.8ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 384)

0: 672x384 (no detections), 108.7ms
Speed: 3.7ms preprocess, 108.7ms inference, 0.6ms postprocess per image at shape (1, 3, 672, 384)

0: 640x384 1 person, 106.1

In [3]:
pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.74-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nv

In [4]:
import cv2
import numpy as np
from ultralytics import YOLO

# Load models
pose_model = YOLO("/content/yolo11n-pose.pt")  # Pose estimation model
object_model = YOLO("/content/best.pt")  # Object detection model

# Function to calculate angle between three points
def calculate_angle(a, b, c):
    if a is None or b is None or c is None:
        return None
    a, b, c = np.array(a), np.array(b), np.array(c)
    ba = a - b
    bc = c - b
    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-6)
    angle = np.degrees(np.arccos(np.clip(cosine_angle, -1.0, 1.0)))
    return angle

# Define action classification based on keypoints
def classify_pose(keypoints):
    if keypoints is None or len(keypoints) < 9:
        return "Unknown"

    nose, left_shoulder, right_shoulder, left_hip, right_hip, left_knee, right_knee, left_ankle, right_ankle = keypoints[:9]
    keypoint_check = all(kp is not None for kp in [left_hip, left_knee, left_ankle, right_hip, right_knee, right_ankle])
    if not keypoint_check:
        return "Unknown"

    left_knee_angle = calculate_angle(left_hip, left_knee, left_ankle)
    right_knee_angle = calculate_angle(right_hip, right_knee, right_ankle)
    torso_angle = calculate_angle(left_shoulder, left_hip, left_knee)
    arm_angle = calculate_angle(left_shoulder, nose, right_shoulder)

    if None in [left_knee_angle, right_knee_angle, torso_angle, arm_angle]:
        return "Unknown"

    stride_length = abs(left_ankle[0] - right_ankle[0])
    knee_difference = abs(left_knee[1] - right_knee[1])
    left_wrist = keypoints[10] if len(keypoints) > 10 else None
    right_wrist = keypoints[11] if len(keypoints) > 11 else None

    if torso_angle < 45:
        return "Bending"
    elif left_knee_angle < 100 or right_knee_angle < 100:
        if stride_length > 50 and knee_difference > 30:
            return "Running"
        return "Walking"
    elif left_knee_angle > 160 and right_knee_angle > 160:
        return "Standing"
    elif nose[1] > left_hip[1] and nose[1] > right_hip[1]:
        return "Lying on Floor"
    elif arm_angle > 120:
        return "Arm Raising"
    elif left_hip[1] < left_knee[1] and right_hip[1] < right_knee[1]:
        return "Jumping"
    elif left_shoulder[0] != right_shoulder[0]:
        return "Leaning"
    elif left_ankle[1] < left_knee[1] and right_ankle[1] < right_knee[1]:
        return "Climbing"
    elif left_wrist and right_wrist and (left_wrist[0] - left_hip[0] < 30 or right_wrist[0] - right_hip[0] < 30):
        return "Touching"
    return "Person"

# Video input
input_video = "/content/Vedio_01.mp4"
cap = cv2.VideoCapture(input_video)
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = int(cap.get(cv2.CAP_PROP_FPS))

output_path = "merged_output_1.avi"
fourcc = cv2.VideoWriter_fourcc(*"XVID")
video_writer = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

# Process video
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Object detection
    object_results = object_model.predict(source=frame, conf=0.5)

    # Pose estimation
    pose_results = pose_model.predict(source=frame, conf=0.5)

   # Draw object detection results
    for result in object_results:
      for box in result.boxes:  # Iterate through boxes within the result
          x1, y1, x2, y2 = map(int, box.xyxy[0])
          label = object_model.names[int(box.cls[0])]  # Access cls from the box
          cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 255), 2)
          cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)

    # Draw pose estimation results
    for result in pose_results:
        for box, keypoints in zip(result.boxes, result.keypoints.xy):
            keypoints = [tuple(map(int, kp)) for kp in keypoints]
            action = classify_pose(keypoints)

            x1, y1, x2, y2 = map(int, box.xyxy[0])
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"{action}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    video_writer.write(frame)

video_writer.release()
cap.release()
print(f"Processed video saved at: {output_path}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Speed: 6.6ms preprocess, 171.1ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x672 1 No Hair cap, 1 Short lab coat, 149.7ms
Speed: 6.5ms preprocess, 149.7ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 672)

0: 384x640 1 person, 181.8ms
Speed: 4.5ms preprocess, 181.8ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x672 1 Short lab coat, 154.4ms
Speed: 4.3ms preprocess, 154.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 672)

0: 384x640 1 person, 160.1ms
Speed: 4.3ms preprocess, 160.1ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x672 1 Short lab coat, 150.5ms
Speed: 4.2ms preprocess, 150.5ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 672)

0: 384x640 1 person, 161.1ms
Speed: 3.9ms preprocess, 161.1ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x672 1 Short lab coat, 