In [1]:
import torch

In [2]:
import ultralytics

View settings with 'yolo settings' or at 'C:\Users\Viren\AppData\Roaming\Ultralytics\settings.yaml'
Update settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [None]:
import os
from ultralytics import YOLO
import supervision as sv
import numpy as np

# Define the paths
VIDEO_PATH = "videos/D01_20240522173959.mp4"
MODEL_PATH = "8s/detect/train3/weights/best.pt"
RESULT_VIDEO_PATH = "results/result.mp4"

# Load the model
model = YOLO(MODEL_PATH)

# Process each frame
def process_frame(frame: np.ndarray, _) -> np.ndarray:
    results = model(frame, imgsz=1280)[0]
    boxes = results.xyxy[:, :4].cpu().numpy()
    confidences = results.xyxy[:, 4].cpu().numpy()
    class_ids = results.xyxy[:, 5].cpu().numpy().astype(int)

    detections = sv.Detections(xyxy=boxes, confidence=confidences, class_id=class_ids)
    box_annotator = sv.BoxAnnotator(thickness=4, text_thickness=2, text_scale=1.5)
    labels = [f"{model.names[class_id]} {confidence:.2f}" for class_id, confidence in zip(class_ids, confidences)]
    frame = box_annotator.annotate(scene=frame, detections=detections, labels=labels)

    return frame

# Process the video
sv.process_video(source_path=VIDEO_PATH, target_path=RESULT_VIDEO_PATH, callback=process_frame)

In [30]:
from ultralytics import YOLO

model = YOLO(r"c:\Users\Viren\Downloads\best.pt")

In [31]:
# Print model architecture
print(model.model)  

SegmentationModel(
  (model): Sequential(
    (0): Conv(
      (conv): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(48, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (1): Conv(
      (conv): Conv2d(48, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (2): C2f(
      (cv1): Conv(
        (conv): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (cv2): Conv(
        (conv): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )


In [32]:
# Get model summary with detailed layers and parameters
model.info()

YOLOv8m-seg summary: 331 layers, 27,242,543 parameters, 0 gradients, 110.4 GFLOPs


(331, 27242543, 0, 110.4044032)

In [33]:
# Access model hyperparameters
print(model.overrides)

{'task': 'segment', 'data': 'H:/no-finding-2/data.yaml', 'imgsz': 512, 'single_cls': False, 'model': 'c:\\Users\\Viren\\Downloads\\best.pt'}


In [34]:
# Get class names the model was trained on
print(model.names)

{0: 'Heart', 1: 'Left_Lung', 2: 'Right_Lung', 3: 'Spine', 4: 'Weasand'}


In [23]:
result = model(r"d:\Projects\RODIC HMIS\Abhimanyu Incubation Phase\X Rays\chest-xray-5\train\images\CHNCXR_0037_0_png.rf.25cf15f5268d187efe853cbe5fce3202.jpg")
result[0].show()


image 1/1 d:\Projects\RODIC HMIS\Abhimanyu Incubation Phase\X Rays\chest-xray-5\train\images\CHNCXR_0037_0_png.rf.25cf15f5268d187efe853cbe5fce3202.jpg: 512x512 1 Heart, 1 Left_Lung, 1 Right_Lung, 1 Spine, 1 Weasand, 496.9ms
Speed: 3.1ms preprocess, 496.9ms inference, 6.2ms postprocess per image at shape (1, 3, 512, 512)


In [15]:
import torch

# Check model device (CPU/GPU)
device = next(model.model.parameters()).device
print("Model is on:", device)

# Get model size in MB
torch.save(model.model.state_dict(), "temp.pth")
import os
print("Model size (MB):", os.path.getsize("temp.pth") / (1024 * 1024))
os.remove("temp.pth")


Model is on: cpu
Model size (MB): 104.36057376861572


In [11]:
import cv2

video_path = "Video/v1Collapse of Overhead Crane.mp4"  # Replace with your video file or set to 0 for webcam
cap = cv2.VideoCapture(video_path)

# Get video properties
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Define video writer to save output (optional)
# out = cv2.VideoWriter("output.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

cv2.namedWindow("Inference", cv2.WINDOW_NORMAL)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Perform inference
    results = model(frame)

    # Draw results on the frame
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])  # Get bounding box
            conf = box.conf[0].item()  # Confidence score
            cls = int(box.cls[0])  # Class index
            label = f"{model.names[cls]}: {conf:.2f}"

            # Draw bounding box and label
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Show frame
    cv2.imshow("Inference", frame)

    # Write to output video
    # out.write(frame)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release resources
cap.release()
# out.release()
cv2.destroyAllWindows()



0: 384x640 1 person, 1 car, 1 truck, 2 handbags, 1 suitcase, 252.1ms
Speed: 5.8ms preprocess, 252.1ms inference, 15.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 cars, 1 truck, 1 handbag, 1 suitcase, 145.4ms
Speed: 0.0ms preprocess, 145.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 cars, 1 truck, 1 handbag, 1 suitcase, 99.8ms
Speed: 0.0ms preprocess, 99.8ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 cars, 1 truck, 1 suitcase, 98.2ms
Speed: 2.0ms preprocess, 98.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 cars, 2 trucks, 1 handbag, 1 suitcase, 98.7ms
Speed: 0.0ms preprocess, 98.7ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 cars, 1 truck, 2 handbags, 1 suitcase, 99.5ms
Speed: 0.0ms preprocess, 99.5ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x

In [None]:
import cv2

video_path = "Video/v1Collapse of Overhead Crane.mp4"
cap = cv2.VideoCapture(video_path)

# Get video properties
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = int(cap.get(cv2.CAP_PROP_FPS))

cv2.namedWindow("Inference", cv2.WINDOW_NORMAL)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Perform inference (disable verbose output)
    results = model(frame, verbose=False)

    # Draw results on the frame
    for result in results:
        for box in result.boxes:
            cls = int(box.cls[0])  # Class index
            
            # Only process detections where class ID == 1 (Person)
            if cls == 0:
                x1, y1, x2, y2 = map(int, box.xyxy[0])  # Get bounding box
                conf = box.conf[0].item()  # Confidence score
                label = f"{model.names[cls]}: {conf:.2f}"

                # Draw bounding box and label
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Show frame
    cv2.imshow("Inference", frame)

    # Write to output video
    # out.write(frame)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release resources
cap.release()
# out.release()
cv2.destroyAllWindows()

In [14]:
import cv2

def process_video(model, video_path):
    cap = cv2.VideoCapture(video_path)
    
    # Get video properties
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    cv2.namedWindow("Inference", cv2.WINDOW_NORMAL)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Perform inference (disable verbose output)
        results = model(frame, verbose=False)

        # Draw results on the frame
        for result in results:
            for box in result.boxes:
                cls = int(box.cls[0])  # Class index

                # Only process detections where class ID == 0 (Person)
                if cls == 0:
                    x1, y1, x2, y2 = map(int, box.xyxy[0])  # Get bounding box
                    conf = box.conf[0].item()  # Confidence score
                    label = f"{model.names[cls]}: {conf:.2f}"

                    # Draw bounding box and label
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Show frame
        cv2.imshow("Inference", frame)

        # Press 'q' to exit
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    # Release resources
    cap.release()
    cv2.destroyAllWindows()

# Example usage:
# process_video(your_model, "path/to/video.mp4")


In [None]:
from ultralytics import YOLO
model = YOLO("yolov8s.pt")

In [17]:
process_video(model, "Video/v10.mp4")

In [21]:
from ultralytics import YOLO
model = YOLO("yolov8s_person_jib_best.pt")

In [15]:
process_video(model, "Video/v10.mp4")

In [12]:
import cv2

def process_video(model, video_path):
    cap = cv2.VideoCapture(video_path)
    
    # Get video properties
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    cv2.namedWindow("Inference", cv2.WINDOW_NORMAL)
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Skip every second frame
        frame_count += 1
        if frame_count % 2 == 0:
            continue

        # Perform inference (disable verbose output)
        results = model(frame, verbose=False)

        # Draw results on the frame
        for result in results:
            for box in result.boxes:
                cls = int(box.cls[0])  # Class index
                conf = box.conf[0].item()  # Confidence score

                # Define colors: Red for class 0, Green for class 1
                color = (0, 0, 255) if cls == 0 else (0, 255, 0)

                # Only process detections with confidence >= 0.20
                if conf >= 0.20 and cls == 0:
                    x1, y1, x2, y2 = map(int, box.xyxy[0])  # Get bounding box
                    label = f"{model.names[cls]}: {conf:.2f}"

                    # Draw bounding box and label
                    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                    cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        # Show frame
        cv2.imshow("Inference", frame)

        # Press 'q' to exit
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    # Release resources
    cap.release()
    cv2.destroyAllWindows()

# Example usage:
# process_video(your_model, "path/to/video.mp4")

In [13]:
from ultralytics import YOLO
model = YOLO("yolov8s_person_jib_best.pt")

In [20]:
process_video(model, "Video/v11.mp4")

In [15]:
process_video(model, "Video/v16.mp4")

In [4]:
from ultralytics import YOLO
model = YOLO("yolov8s.pt")

In [None]:
from ultralytics import YOLO
model = YOLO("yolov8s.pt")
process_video(model, "Video/v16.mp4")

In [None]:
import cv2

def process_video(model, video_path):
    cap = cv2.VideoCapture(video_path)
    
    # Get video properties
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    cv2.namedWindow("Inference", cv2.WINDOW_NORMAL)
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Skip every second frame
        frame_count += 1
        if frame_count % 2 == 0:
            continue

        # Perform inference (disable verbose output)
        # results = model(frame, verbose=False)
        results = model.track(frame, show=False, tracker="bytetrack.yaml", verbose=False)

        # Draw results on the frame
        for result in results:
            for box in result.boxes:
                cls = int(box.cls[0])  # Class index
                conf = box.conf[0].item()  # Confidence score

                # Define colors: Red for class 0, Green for class 1
                color = (0, 0, 255) if cls == 0 else (0, 255, 0)

                # Only process detections with confidence >= 0.20
                if conf >= 0.20 and cls == 0:
                    x1, y1, x2, y2 = map(int, box.xyxy[0])  # Get bounding box
                    label = f"{model.names[cls]}: {conf:.2f}"

                    # Draw bounding box and label
                    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                    cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        # Show frame
        cv2.imshow("Inference", frame)

        # Press 'q' to exit
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    # Release resources
    cap.release()
    cv2.destroyAllWindows()

# Example usage:
# process_video(your_model, "path/to/video.mp4")

In [38]:
from ultralytics import YOLO
model = YOLO("yolov8s.pt")
process_video(model, "Video/v16.mp4")


0: 384x640 2 persons, 302.4ms
Speed: 9.9ms preprocess, 302.4ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 252.3ms
Speed: 0.0ms preprocess, 252.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 332.2ms
Speed: 0.0ms preprocess, 332.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 295.7ms
Speed: 1.9ms preprocess, 295.7ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 183.6ms
Speed: 13.5ms preprocess, 183.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 151.6ms
Speed: 10.2ms preprocess, 151.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 220.7ms
Speed: 0.0ms preprocess, 220.7ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 193.7ms
Speed: 0.0ms preprocess, 193.7ms inference, 0.0ms postprocess per

KeyboardInterrupt: 

In [None]:
import cv2

def process_video(model, video_path):
    cap = cv2.VideoCapture(video_path)
    
    # Get video properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    cv2.namedWindow("Inference", cv2.WINDOW_NORMAL)
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Skip every second frame explicitly
        frame_count += 2
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count)

        # Perform inference (disable verbose output)
        results = model.track(frame, show=False, tracker="bytetrack.yaml", verbose=False)

        # Draw results on the frame
        if results:
            for result in results:
                for box in result.boxes:
                    cls = int(box.cls[0])  # Class index
                    conf = box.conf[0].item()  # Confidence score

                    # Define colors: Red for class 0, Green for class 1
                    color = (0, 0, 255) if cls == 0 else (0, 255, 0)

                    # Only process detections with confidence >= 0.20
                    if conf >= 0.20 and cls == 0:
                        x1, y1, x2, y2 = map(int, box.xyxy[0])  # Get bounding box
                        label = f"{model.names[cls]}: {conf:.2f}"

                        # Draw bounding box and label
                        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        # Resize window to fit video frame size
        cv2.resizeWindow("Inference", frame_width, frame_height)
        cv2.imshow("Inference", frame)

        # Press 'q' to exit
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    # Release resources
    cap.release()
    cv2.destroyAllWindows()


In [40]:
from ultralytics import YOLO
model = YOLO("yolov8s.pt")
process_video(model, "Video/v16.mp4")

In [None]:
import cv2

def process_video(model, video_path):
    cap = cv2.VideoCapture(video_path)
    
    # # Get video properties
    # frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    # frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    # fps = int(cap.get(cv2.CAP_PROP_FPS))

    # cv2.namedWindow("Inference", cv2.WINDOW_NORMAL)
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Skip every second frame explicitly
        frame_count += 2
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count)

        # Perform inference (disable verbose output)
        results = model.track(frame, show=True, tracker="bytetrack.yaml", verbose=False)

        # Draw results on the frame
        if results:
            for result in results:
                for box in result.boxes:
                    cls = int(box.cls[0])  # Class index
                    conf = box.conf[0].item()  # Confidence score

                    # Define colors: Red for class 0, Green for class 1
                    color = (0, 0, 255) if cls == 0 else (0, 255, 0)

                    # Only process detections with confidence >= 0.20
                    if conf >= 0.20 and cls == 0:
                        x1, y1, x2, y2 = map(int, box.xyxy[0])  # Get bounding box
                        label = f"{model.names[cls]}: {conf:.2f}"

                        # Draw bounding box and label
                        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        # Resize window to fit video frame size
        # cv2.resizeWindow("Inference", frame_width, frame_height)
        # cv2.imshow("Inference", frame)

        # # Press 'q' to exit
        # if cv2.waitKey(1) & 0xFF == ord("q"):
        #     break

    # Release resources
    # cap.release()
    # cv2.destroyAllWindows()


: 

In [52]:
from ultralytics import YOLO
model = YOLO("yolov8s.pt")
process_video(model, "Video/v16.mp4")