# This notebook will try to load simple litter survey data sample and try to train using it

In [31]:
%pip install ultralytics
import ultralytics
ultralytics.checks()

Ultralytics 8.3.57 🚀 Python-3.10.12 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3080 Ti, 12287MiB)
Setup complete ✅ (8 CPUs, 7.7 GB RAM, 132.4/1006.9 GB disk)


In [7]:
!ls /app

CITATION.cff	 docker		     litter_survey_dataset  train_dataset
CONTRIBUTING.md  dockerfile	     mkdocs.yml		    ultralytics
LICENSE		 docs		     pyproject.toml
README.md	 examples	     requirements.txt
README.zh-CN.md  lin_toturial.ipynb  tests


In [8]:
!ls /app/examples  

README.md		       YOLOv8-OpenVINO-CPP-Inference
RTDETR-ONNXRuntime-Python      YOLOv8-Region-Counter
YOLO-Series-ONNXRuntime-Rust   YOLOv8-SAHI-Inference-Video
YOLOv8-Action-Recognition      YOLOv8-Segmentation-ONNXRuntime-Python
YOLOv8-CPP-Inference	       YOLOv8-TFLite-Python
YOLOv8-LibTorch-CPP-Inference  heatmaps.ipynb
YOLOv8-ONNXRuntime	       hub.ipynb
YOLOv8-ONNXRuntime-CPP	       object_counting.ipynb
YOLOv8-ONNXRuntime-Rust        object_tracking.ipynb
YOLOv8-OpenCV-ONNX-Python      tutorial.ipynb


### Check if you have GPU enabled

In [30]:
!python3 -c "import torch; print(torch.cuda.is_available())"

True


### Check if your docker container has all the necessary lib

In [10]:
import torch

In [11]:
import ultralytics

In [12]:
import cv2

In [13]:
print('Importing worked! You are all set!')

Importing worked! You are all set!


# Now try to load some image and run prediction

In [14]:
import cv2

def extract_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_rate = 30  # Assuming 30 fps
    count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if count % frame_rate == 0:
            yield frame
        count += 1
    cap.release()

In [15]:
image_path = 'litter_survey_dataset/2024-07-21/RayBan/photo-21700_singular_display_fullPicture.jpeg'

In [16]:
from ultralytics import YOLO
from ultralytics.solutions import object_counter
import cv2
import matplotlib.pyplot as plt


def run_inference(image_path):
    # Read the input image
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for Matplotlib
    
    print(image.shape)
    model = YOLO("yolo11n.pt")
    # Run inference on the image
    results = model(image)
    
    # Loop through the results and draw bounding boxes
    for result in results:
        for box in result.boxes:
            # Get the bounding box coordinates
            x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
            print(f"{x1} {y1} {x2} {y2}")
            
            # Draw the bounding box on the image
            cv2.rectangle(image_rgb, (x1, y1), (x2, y2), (255, 0, 0), 12)  # Red color in RGB
            label_position_x = round(x1)
            label_position_y = round((y1+y2)/2)
            # Get the class label and confidence score
            label = result.names[int(box.cls[0].cpu().numpy())]
            print(f"available class id and label: {result.names}\npredicted class id:")
            print(box.cls[0].cpu().numpy())
            
            confidence = float(box.conf[0].cpu().numpy())
            print(f"confidence {confidence}")
            
            # Draw the label and confidence score on the image with increased text size
            font_scale = 6  # Increased from 0.5 to 5 for 10x larger text
            thickness = 12  # Increased thickness for better visibility at larger scale
            cv2.putText(image_rgb, f"{label}: {confidence:.2f}", (label_position_x, label_position_y), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 0, 0), thickness, cv2.LINE_AA)
    
    # Display the image with bounding boxes using Matplotlib
    plt.figure(figsize=(10,10))
    plt.imshow(image_rgb)
    plt.axis('off')
    plt.show()



# Run the inference
run_inference(image_path)

(4032, 3024, 3)

0: 640x480 1 umbrella, 105.2ms
Speed: 4.8ms preprocess, 105.2ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)
1131 2 3016 364
available class id and label: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'ca

<Figure size 1000x1000 with 1 Axes>

### The same can be done via cli

In [17]:
!yolo predict model=yolo11n.pt source='litter_survey_dataset/2024-07-21/RayBan/photo-21700_singular_display_fullPicture.jpeg'

Ultralytics 8.3.57 🚀 Python-3.10.12 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3080 Ti, 12287MiB)
YOLO11n summary (fused): 238 layers, 2,616,248 parameters, 0 gradients, 6.5 GFLOPs

image 1/1 /ultralytics/litter_survey_dataset/2024-07-21/RayBan/photo-21700_singular_display_fullPicture.jpeg: 640x480 1 umbrella, 113.3ms
Speed: 7.6ms preprocess, 113.3ms inference, 6.6ms postprocess per image at shape (1, 3, 640, 480)
Results saved to [1m/ultralytics/runs/detect/predict[0m
💡 Learn more at https://docs.ultralytics.com/modes/predict


# Model Training 
### Refer to https://docs.ultralytics.com/modes/train/#tensorboard

In [2]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolo11n.pt")  # load a pretrained model (recommended for training)

# Train the model
results = model.train(data="coco8.yaml", epochs=3, imgsz=640)

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt'...


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5.35M/5.35M [00:00<00:00, 22.2MB/s]


Ultralytics 8.3.57 🚀 Python-3.10.12 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3080 Ti, 12287MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=coco8.yaml, epochs=3, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=Tr

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 433k/433k [00:00<00:00, 8.08MB/s]
Unzipping /datasets/coco8.zip to /datasets/coco8...: 100%|██████████| 25/25 [00:00<00:00, 4218.09file/s]

Dataset download success ✅ (1.0s), saved to [1m/datasets[0m






Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 755k/755k [00:00<00:00, 12.8MB/s]


                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      6640  ultralytics.nn.modules.block.C3k2            [32, 64, 1, False, 0.25]      
  3                  -1  1     36992  ultralytics.nn.modules.conv.Conv             [64, 64, 3, 2]                
  4                  -1  1     26080  ultralytics.nn.modules.block.C3k2            [64, 128, 1, False, 0.25]     
  5                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
  6                  -1  1     87040  ultralytics.nn.modules.block.C3k2            [128, 128, 1, True]           
  7                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128




YOLO11n summary: 319 layers, 2,624,080 parameters, 2,624,064 gradients, 6.6 GFLOPs

Transferred 499/499 items from pretrained weights
Freezing layer 'model.23.dfl.conv.weight'
[34m[1mAMP: [0mrunning Automatic Mixed Precision (AMP) checks...
[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /datasets/coco8/labels/train... 4 images, 0 backgrounds, 0 corrupt: 100%|██████████| 4/4 [00:00<00:00, 2752.17it/s][0m

[34m[1mtrain: [0mNew cache created: /datasets/coco8/labels/train.cache



[34m[1mval: [0mScanning /datasets/coco8/labels/val... 4 images, 0 backgrounds, 0 corrupt: 100%|██████████| 4/4 [00:00<00:00, 723.78it/s][0m

[34m[1mval: [0mNew cache created: /datasets/coco8/labels/val.cache





Plotting labels to /ultralytics/runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1m/ultralytics/runs/detect/train[0m
Starting training for 3 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/3     0.717G      1.092       2.75      1.488         21        640: 100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.13s/it]

                   all          4         17      0.568       0.85      0.878      0.634






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/3     0.849G      1.186      2.796      1.488         36        640: 100%|██████████| 1/1 [00:00<00:00,  7.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.35it/s]

                   all          4         17      0.557       0.85      0.886      0.635






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/3     0.784G      1.096      2.508      1.219         20        640: 100%|██████████| 1/1 [00:00<00:00,  6.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.69it/s]

                   all          4         17      0.559       0.85      0.851      0.615






3 epochs completed in 0.001 hours.
Optimizer stripped from /ultralytics/runs/detect/train/weights/last.pt, 5.5MB
Optimizer stripped from /ultralytics/runs/detect/train/weights/best.pt, 5.5MB

Validating /ultralytics/runs/detect/train/weights/best.pt...
Ultralytics 8.3.57 🚀 Python-3.10.12 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3080 Ti, 12287MiB)
YOLO11n summary (fused): 238 layers, 2,616,248 parameters, 0 gradients, 6.5 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.51it/s]


                   all          4         17      0.558       0.85      0.886      0.635
                person          3         10      0.548        0.6      0.592      0.265
                   dog          1          1      0.541          1      0.995      0.697
                 horse          1          2        0.5          1      0.995      0.674
              elephant          1          2      0.358        0.5      0.745      0.281
              umbrella          1          1      0.563          1      0.995      0.995
          potted plant          1          1       0.84          1      0.995      0.895
Speed: 0.8ms preprocess, 12.2ms inference, 0.0ms loss, 2.2ms postprocess per image
Results saved to [1m/ultralytics/runs/detect/train[0m


In [3]:
print('Training success!')

Training success!


### Now try to train using litter survey data

In [27]:
# First prepare the dataset 

import os 
import json 
img_width = 3024
img_height = 4032 

label_dir = 'train_dataset/labels'
for label_type in ['train', 'val']:
    label_files = os.listdir(f'{label_dir}/{label_type}')
    for label_json in label_files: 
        if label_json.endswith('.json'):
            print(label_json)
            with open(f'{label_dir}/{label_type}/{label_json}', 'r') as json_file_content:
                json_data = json.load(json_file_content)
                shapes = json_data['shapes']
                for s in shapes:
                    if s['label'] == 'Grabber':
                        x1,y1 = s['points'][0] 
                        x2,y2 = s['points'][1]
                        center_x = (x1+x2)/2 / img_width
                        center_y = (y1+y2)/2 / img_height
                        width = abs(x2-x1)  / img_width
                        height = abs(y2-y1) / img_height
                        label_txt = label_json.replace('.json','.txt') 
                        with open(f'{label_dir}/{label_type}/{label_txt}', 'w') as txt_file:
                            txt_file.write(f'1 {center_x} {center_y} {width} {height}')



photo-19704_singular_display_fullPicture.json
photo-19639_singular_display_fullPicture.json
mcp_photo-19652_singular_display_fullPicture.json
photo-1007_singular_display_fullPicture.json
photo-3371_singular_display_fullPicture.json
mcp_photo-19511_singular_display_fullPicture.json


In [34]:
from ultralytics import YOLO

model = YOLO('yolo11n.pt')
data_yaml = '/app/train_dataset/train_dataset.yaml'
print(f"Loading dataset: {data_yaml}")

Loading dataset: /app/train_dataset/train_dataset.yaml


In [36]:

# model.train(data=data_yaml, epochs=3, imgsz=max(3042, 4032))