# Split video to frames 

In [1]:
import cv2
import os

def split_video_to_frames(video_path, output_folder):
    """
    Split a video file into frames and save them as images with a filename prefix.

    Args:
        video_path (str): Path to the input video file.
        output_folder (str): Path to the folder where frames will be saved.

    Returns:
        None
    """
    # Extract the base name without extension to use as prefix
    video_name = os.path.splitext(os.path.basename(video_path))[0]

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Open the video file
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Could not open video {video_path}")
        return

    # Frame counter
    frame_count = 0

    # Calculate the total number of frames for padding (7 digits for 9000 max frames)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    padding_width = max(len(str(total_frames)), 7)

    while True:
        ret, frame = cap.read()
        if not ret:
            break  # Break when the video ends

        # Construct the frame file name with 7-digit zero-padding
        frame_file = os.path.join(output_folder, f"{video_name}_{frame_count:0{padding_width}d}.jpeg")

        # Save the frame as an image
        cv2.imwrite(frame_file, frame)
        frame_count += 1

    cap.release()
    print(f"Extracted {frame_count} frames to {output_folder}")

In [5]:
import os 
videos = os.listdir('/app/litter_survey_dataset/SampleVideoToAnnotate')
print(videos)

['video-28057_singular_display.mov', 'video-28097_singular_display.mov', 'VID_20250119_103359_00_084.mp4', 'VID_20250119_104635_00_093.mp4', 'VID_20250119_104758_00_094.mp4', 'VID_20250119_104857_00_095.mp4', 'VID_20250119_105116_00_096.mp4', 'VID_20250119_105144_00_097.mp4', 'VID_20250119_105232_00_098.mp4', 'VID_20250119_105257_00_099.mp4', 'VID_20250119_105326_00_100.mp4', 'VID_20250119_105340_00_101.mp4', 'VID_20250119_105428_00_102.mp4', 'VID_20250119_105522_00_103.mp4', 'VID_20250119_105706_00_104.mp4', 'VID_20250119_105935_00_105.mp4']


In [None]:
video_output_folder_template = '/app/litter_survey_dataset/SampleVideoToAnnotate/{video_name}'
video_paths = os.listdir('/app/litter_survey_dataset/SampleVideoToAnnotate/')
for video_path in video_paths:
    if video_path.endswith('.mp4') or video_path.endswith('.mov'):
        output_folder = video_output_folder_template.format(video_name=video_path.split('.')[0])
        split_video_to_frames(video_output_folder_template.format(video_name=video_path), output_folder=output_folder)

In [None]:
sum_all_frames = 
Extracted 500 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/video-28057_singular_display
Extracted 1053 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/video-28097_singular_display
Extracted 1822 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/VID_20250119_103359_00_084
Extracted 724 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/VID_20250119_104635_00_093
Extracted 1336 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/VID_20250119_104758_00_094
Extracted 177 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/VID_20250119_104857_00_095
Extracted 122 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/VID_20250119_105116_00_096
Extracted 107 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/VID_20250119_105144_00_097
Extracted 132 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/VID_20250119_105232_00_098
Extracted 120 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/VID_20250119_105257_00_099
Extracted 131 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/VID_20250119_105326_00_100
Extracted 318 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/VID_20250119_105340_00_101
Extracted 74 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/VID_20250119_105428_00_102
Extracted 153 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/VID_20250119_105522_00_103
Extracted 289 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/VID_20250119_105706_00_104
Extracted 167 frames to /app/litter_survey_dataset/SampleVideoToAnnotate/VID_20250119_105935_00_105

# Load model and run inference on the data 

In [11]:
import base64
from PIL import Image
VERBOSE = False 

def log(msg:str, verbose:bool=False):
    if VERBOSE:
        print(msg)

In [12]:
from ultralytics import YOLO
from ultralytics.solutions import object_counter
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
 
def run_inference(image_path, model, font_scale=6, thickness=12, confidence_threshold:float = 0.35, show_plot:bool = True):
    # Read the input image
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for Matplotlib
    
    log(image.shape)
    
    # Run inference on the image
    results = model(image)
    contain_grabber_confidence = 0
    contain_litter_confidence = 0

    # Loop through the results and draw bounding boxes
    for result in results:
        for box in result.boxes:
            # Get the bounding box coordinates
            x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
            log(f"{x1} {y1} {x2} {y2}")
            confidence = float(box.conf[0].cpu().numpy())
            log(f"confidence {confidence}")
    
            # Draw the bounding box on the image
            if confidence > confidence_threshold:
                if show_plot:
                    cv2.rectangle(image_rgb, (x1, y1), (x2, y2), (0, 0, 255), thickness)  # Red color in RGB
                    label_position_x = round(x1)
                    label_position_y = round((y1+y2)/2)
                # Get the class label and confidence score
                label = result.names[int(box.cls[0].cpu().numpy())]
                log(f"available class id and label: {result.names}\npredicted class id:")
                class_id_predicted = box.cls[0].cpu().numpy()
                log(class_id_predicted)  
                if class_id_predicted == 0:
                    contain_grabber_confidence = max(confidence,contain_grabber_confidence)
                elif class_id_predicted == 1:
                    contain_litter_confidence = max(confidence,contain_litter_confidence)

                if show_plot:
                    # Draw the label and confidence score on the image with increased text size
                    # font_scale = 6   # Increased from 0.5 to 5 for 10x larger text
                    # thickness = 12  # Increased thickness for better visibility at larger scale
                    cv2.putText(image_rgb, f"{label}: {confidence:.2f}", (label_position_x, label_position_y), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 255, 0), thickness, cv2.LINE_AA)
    if show_plot:
        # Display the image with bounding boxes using Matplotlib
        plt.figure(figsize=(10,10))
        plt.imshow(image_rgb)
    
    return (contain_grabber_confidence, contain_litter_confidence)

In [17]:
from ultralytics import YOLO
%matplotlib inline

# Load the trained model
model = YOLO('runs/detect/train17/weights/best.pt')  # Replace with the path to your trained model
# image_path = 'train_dataset/images/train/A664F919-BBD0-4DB1-A5EB-011EC61D6B28_1_105_c_resized.jpeg'
image_path = 'litter_survey_dataset/2024-07-21/RayBan/photo-1007_singular_display_fullPicture.jpeg'

contain_grabber_confidence, contain_litter_confidence = run_inference(image_path=image_path, model=model,confidence_threshold=0.4,thickness=9,font_scale=3, show_plot = False)


0: 640x480 1 Grabber, 1 Litter, 17.2ms
Speed: 15.2ms preprocess, 17.2ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 480)


In [15]:
import os
import shutil

def copy_file_with_renamed_folder(source_file, new_folder):
    """
    Copies a file to a new folder with a renamed folder name.

    Args:
        source_file (str): Path to the source file.
        new_folder (str): Path to the new folder.

    Returns:
        None
    """
    # Ensure the new folder exists
    os.makedirs(new_folder, exist_ok=True)

    # Extract the filename
    file_name = os.path.basename(source_file)

    # Construct the destination file path
    destination_file = os.path.join(new_folder, file_name)

    # Copy the file
    shutil.copy(source_file, destination_file)
    print(f"Copied {source_file} to {destination_file}")

In [None]:
VERBOSE = False 
images_contain_litter = []
grabber_confidence_threshold=0.45
litter_confidence_threshold=0.25

images_in_video_dir = '/app/litter_survey_dataset/SampleVideoToAnnotate'
video_paths = os.listdir(images_in_video_dir)
for video_path in video_paths:
    if not (video_path.endswith('.mp4') or video_path.endswith('.mov') or video_path.endswith('_contain_litter')):
        print(video_path)
        images_in_videos = os.listdir(f'/app/litter_survey_dataset/SampleVideoToAnnotate/{video_path}')
        for image_in_video in images_in_videos:
            if image_in_video.endswith('.jpeg'):
                image_path = f'{images_in_video_dir}/{video_path}/{image_in_video}'
                grabber_confidence, litter_confidence = run_inference(image_path=image_path, model=model,confidence_threshold=0.25,show_plot=False)
                if grabber_confidence > grabber_confidence_threshold and litter_confidence > litter_confidence_threshold:
                    images_contain_litter.append(image_path)
                    copy_file_with_renamed_folder(image_path, f'{images_in_video_dir}/{video_path}_contain_litter')
            

video-28057_singular_display

0: 640x512 1 Grabber, 1 Litter, 51.3ms
Speed: 4.3ms preprocess, 51.3ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 512)

0: 640x512 1 Grabber, 1 Litter, 18.5ms
Speed: 3.6ms preprocess, 18.5ms inference, 11.1ms postprocess per image at shape (1, 3, 640, 512)

0: 640x512 1 Grabber, 1 Litter, 15.3ms
Speed: 3.6ms preprocess, 15.3ms inference, 10.7ms postprocess per image at shape (1, 3, 640, 512)

0: 640x512 1 Litter, 13.2ms
Speed: 4.5ms preprocess, 13.2ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 512)

0: 640x512 1 Grabber, 1 Litter, 39.4ms
Speed: 4.3ms preprocess, 39.4ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 512)

0: 640x512 3 Grabbers, 1 Litter, 24.3ms
Speed: 3.1ms preprocess, 24.3ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 512)

0: 640x512 1 Grabber, 1 Litter, 13.0ms
Speed: 3.3ms preprocess, 13.0ms inference, 7.9ms postprocess per image at shape (1, 3, 640, 512)
Copied /app/litter_s

In [30]:
video_paths

['video-28057_singular_display',
 'video-28057_singular_display.mov',
 'video-28097_singular_display',
 'video-28097_singular_display.mov',
 'VID_20250119_103359_00_084',
 'VID_20250119_103359_00_084.mp4',
 'VID_20250119_104635_00_093',
 'VID_20250119_104635_00_093.mp4',
 'VID_20250119_104758_00_094',
 'VID_20250119_104758_00_094.mp4',
 'VID_20250119_104857_00_095',
 'VID_20250119_104857_00_095.mp4',
 'VID_20250119_105116_00_096',
 'VID_20250119_105116_00_096.mp4',
 'VID_20250119_105144_00_097',
 'VID_20250119_105144_00_097.mp4',
 'VID_20250119_105232_00_098',
 'VID_20250119_105232_00_098.mp4',
 'VID_20250119_105257_00_099',
 'VID_20250119_105257_00_099.mp4',
 'VID_20250119_105326_00_100',
 'VID_20250119_105326_00_100.mp4',
 'VID_20250119_105340_00_101',
 'VID_20250119_105340_00_101.mp4',
 'VID_20250119_105428_00_102',
 'VID_20250119_105428_00_102.mp4',
 'VID_20250119_105522_00_103',
 'VID_20250119_105522_00_103.mp4',
 'VID_20250119_105706_00_104',
 'VID_20250119_105706_00_104.mp4',
 '

# only keep x number of images per every {keep_cadence} images

In [42]:
keep_cadence = 6 
video_paths = os.listdir(images_in_video_dir)
for video_path in video_paths:
    if video_path.endswith('_contain_litter'):
        images_in_videos_contain_litter = os.listdir(f'{images_in_video_dir}/{video_path}')
        img_count = 0 
        kept = 0
        for image_contain_litter in images_in_videos_contain_litter: 
            if img_count % keep_cadence == 0:
                kept +=1 
                image_path = f'{images_in_video_dir}/{video_path}/{image_contain_litter}'
                copy_file_with_renamed_folder(image_path, f'{images_in_video_dir}/final_picked')
            img_count += 1
        print((kept, img_count))

Copied /app/litter_survey_dataset/SampleVideoToAnnotate/video-28057_singular_display_contain_litter/video-28057_singular_display_0000006.jpeg to /app/litter_survey_dataset/SampleVideoToAnnotate/final_picked/video-28057_singular_display_0000006.jpeg
Copied /app/litter_survey_dataset/SampleVideoToAnnotate/video-28057_singular_display_contain_litter/video-28057_singular_display_0000090.jpeg to /app/litter_survey_dataset/SampleVideoToAnnotate/final_picked/video-28057_singular_display_0000090.jpeg
Copied /app/litter_survey_dataset/SampleVideoToAnnotate/video-28057_singular_display_contain_litter/video-28057_singular_display_0000211.jpeg to /app/litter_survey_dataset/SampleVideoToAnnotate/final_picked/video-28057_singular_display_0000211.jpeg
Copied /app/litter_survey_dataset/SampleVideoToAnnotate/video-28057_singular_display_contain_litter/video-28057_singular_display_0000324.jpeg to /app/litter_survey_dataset/SampleVideoToAnnotate/final_picked/video-28057_singular_display_0000324.jpeg
Copi