In [None]:
!pip install tensorflow opencv-python pillow ultralytics

In [None]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input, decode_predictions

# Load pre-trained model
model = tf.keras.applications.MobileNetV2(weights='imagenet')

# Open webcam
cap = cv2.VideoCapture(0)  # 0 indicates the default camera

def preprocess_frame(frame):
    img = cv2.resize(frame, (224, 224))
    img = preprocess_input(img)
    img = np.expand_dims(img, axis=0)
    return img

def classify_frame(frame):
    img = preprocess_frame(frame)
    predictions = model.predict(img)
    decoded_predictions = decode_predictions(predictions, top=1)[0][0]
    return decoded_predictions

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    classification = classify_frame(frame)
    label = f"{classification[1]}: {classification[2]*100:.2f}%"
    
    if classification[2] >= 0.7:
        cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
    #cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
    cv2.imshow('Live Video Feed', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [None]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input, decode_predictions

# Load pre-trained model
model = tf.keras.applications.MobileNetV2(weights='imagenet')

# Open webcam
cap = cv2.VideoCapture(0)  # 0 indicates the default camera

def preprocess_frame(frame):
    img = cv2.resize(frame, (224, 224))
    img = preprocess_input(img)
    img = np.expand_dims(img, axis=0)
    return img

def classify_frame(frame):
    img = preprocess_frame(frame)
    predictions = model.predict(img)
    decoded_predictions = decode_predictions(predictions, top=1)[0][0]
    return decoded_predictions

def split_frame(frame, grid_size=2):
    h, w, _ = frame.shape
    return [frame[y:y + h//grid_size, x:x + w//grid_size] for y in range(0, h, h//grid_size) for x in range(0, w, w//grid_size)]

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    sections = split_frame(frame, grid_size=2)
    for i, section in enumerate(sections):
        classification = classify_frame(section)
        confidence = classification[2] * 100
        
        if confidence >= 70:
            label = f"{classification[1]}: {confidence:.2f}%"
            start_x = (i % 2) * (frame.shape[1] // 2)
            start_y = (i // 2) * (frame.shape[0] // 2)
            end_x = start_x + (frame.shape[1] // 2)
            end_y = start_y + (frame.shape[0] // 2)

            # Draw rectangle and label
            cv2.rectangle(frame, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
            cv2.putText(frame, label, (start_x, start_y + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2, cv2.LINE_AA)
    
    cv2.imshow('Live Video Feed', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [13]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input, decode_predictions

# Global Variables
GRID_SIZE = 4
CONFIDENCE_THRESHOLD = 70
MODEL = tf.keras.applications.MobileNetV2(weights='imagenet')
CAPTURE_SOURCE = 0  # Webcam ID

# Open webcam
cap = cv2.VideoCapture(CAPTURE_SOURCE)

def preprocess_frame(frame):
    img = cv2.resize(frame, (224, 224))
    img = preprocess_input(img)
    img = np.expand_dims(img, axis=0)
    return img

def classify_frame(frame):
    img = preprocess_frame(frame)
    predictions = MODEL.predict(img)
    decoded_predictions = decode_predictions(predictions, top=1)[0][0]
    return decoded_predictions

def split_frame(frame, grid_size=GRID_SIZE):
    h, w, _ = frame.shape
    return [frame[y:y + h//grid_size, x:x + w//grid_size] for y in range(0, h, h//grid_size) for x in range(0, w, w//grid_size)]

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    sections = split_frame(frame, grid_size=GRID_SIZE)
    section_height = frame.shape[0] // GRID_SIZE
    section_width = frame.shape[1] // GRID_SIZE
    for i, section in enumerate(sections):
        classification = classify_frame(section)
        confidence = classification[2] * 100
        
        if confidence >= CONFIDENCE_THRESHOLD:
            label = f"{classification[1]}: {confidence:.2f}%"
            start_x = (i % GRID_SIZE) * section_width
            start_y = (i // GRID_SIZE) * section_height
            end_x = start_x + section_width
            end_y = start_y + section_height

          # Draw rectangle and label
            cv2.rectangle(frame, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
            cv2.putText(frame, label, (start_x, start_y + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA)
    
    cv2.imshow('Live Video Feed', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 830ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

In [None]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input, decode_predictions

# Global Variables
GRID_SIZE = 4
CONFIDENCE_THRESHOLD = 70
MODEL = tf.keras.applications.MobileNetV2(weights='imagenet')
CAPTURE_SOURCE = 0  # Webcam ID
OUTPUT_FILE = 'output.avi'

# Open webcam
cap = cv2.VideoCapture(CAPTURE_SOURCE)

# Get the width and height of the frame
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
out = cv2.VideoWriter(OUTPUT_FILE, cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width, frame_height))

def preprocess_frame(frame):
    img = cv2.resize(frame, (224, 224))
    img = preprocess_input(img)
    img = np.expand_dims(img, axis=0)
    return img

def classify_frame(frame):
    img = preprocess_frame(frame)
    predictions = MODEL.predict(img)
    decoded_predictions = decode_predictions(predictions, top=1)[0][0]
    return decoded_predictions

def split_frame(frame, grid_size=GRID_SIZE):
    h, w, _ = frame.shape
    return [frame[y:y + h//grid_size, x:x + w//grid_size] for y in range(0, h, h//grid_size) for x in range(0, w, w//grid_size)]

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    sections = split_frame(frame, grid_size=GRID_SIZE)
    section_height = frame.shape[0] // GRID_SIZE
    section_width = frame.shape[1] // GRID_SIZE
    for i, section in enumerate(sections):
        classification = classify_frame(section)
        confidence = classification[2] * 100
        
        if confidence >= CONFIDENCE_THRESHOLD:
            label = f"{classification[1]}: {confidence:.2f}%"
            start_x = (i % GRID_SIZE) * section_width
            start_y = (i // GRID_SIZE) * section_height
            end_x = start_x + section_width
            end_y = start_y + section_width

            # Draw rectangle and label
            cv2.rectangle(frame, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
            cv2.putText(frame, label, (start_x, start_y + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA)
    
    # Write the frame to the output file
    out.write(frame)
    
    cv2.imshow('Live Video Feed', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release everything if job is finished
cap.release()
out.release()
cv2.destroyAllWindows()



In [None]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input, decode_predictions
from datetime import datetime

# Global Variables
GRID_SIZE = 4
CONFIDENCE_THRESHOLD = 70
MODEL = tf.keras.applications.MobileNetV2(weights='imagenet')
CAPTURE_SOURCE = 0  # Webcam ID
OUTPUT_FILE = 'output.avi'

# Open webcam
cap = cv2.VideoCapture(CAPTURE_SOURCE)

# Get the width and height of the frame
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
out = cv2.VideoWriter(OUTPUT_FILE, cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width, frame_height))

def preprocess_frame(frame):
    img = cv2.resize(frame, (224, 224))
    img = preprocess_input(img)
    img = np.expand_dims(img, axis=0)
    return img

def classify_frame(frame):
    img = preprocess_frame(frame)
    predictions = MODEL.predict(img)
    decoded_predictions = decode_predictions(predictions, top=1)[0][0]
    return decoded_predictions

def split_frame(frame, grid_size=GRID_SIZE):
    h, w, _ = frame.shape
    return [frame[y:y + h//grid_size, x:x + w//grid_size] for y in range(0, h, h//grid_size) for x in range(0, w, w//grid_size)]

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    sections = split_frame(frame, grid_size=GRID_SIZE)
    section_height = frame.shape[0] // GRID_SIZE
    section_width = frame.shape[1] // GRID_SIZE
    for i, section in enumerate(sections):
        classification = classify_frame(section)
        confidence = classification[2] * 100
        
        if confidence >= CONFIDENCE_THRESHOLD:
            label = f"{classification[1]}: {confidence:.2f}%"
            start_x = (i % GRID_SIZE) * section_width
            start_y = (i // GRID_SIZE) * section_height
            end_x = start_x + section_width
            end_y = start_y + section_width

            # Draw rectangle and label
            cv2.rectangle(frame, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
            cv2.putText(frame, label, (start_x, start_y + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA)
    
    # Get current date and time
    date_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
    # Overlay date and time on the frame
    cv2.putText(frame, date_time, (10, frame_height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
    
    # Write the frame to the output file
    out.write(frame)
    
    cv2.imshow('Live Video Feed', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release everything if job is finished
cap.release()
out.release()
cv2.destroyAllWindows()


In [None]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input, decode_predictions
from datetime import datetime
import threading

# Global Variables
GRID_SIZE = 4
CONFIDENCE_THRESHOLD = 70
MODEL = tf.keras.applications.MobileNetV2(weights='imagenet')
CAPTURE_SOURCE = 0  # Webcam ID
OUTPUT_FILE = 'output.avi'

# Open webcam
cap = cv2.VideoCapture(CAPTURE_SOURCE)

# Get the width and height of the frame
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
out = cv2.VideoWriter(OUTPUT_FILE, cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width, frame_height))

def preprocess_frame(frame):
    img = cv2.resize(frame, (224, 224))
    img = preprocess_input(img)
    img = np.expand_dims(img, axis=0)
    return img

def classify_frame(frame):
    img = preprocess_frame(frame)
    predictions = MODEL.predict(img)
    decoded_predictions = decode_predictions(predictions, top=1)[0][0]
    return decoded_predictions

def split_frame(frame, grid_size=GRID_SIZE):
    h, w, _ = frame.shape
    return [frame[y:y + h//grid_size, x:x + w//grid_size] for y in range(0, h, h//grid_size) for x in range(0, w, w//grid_size)]

def process_section(section, i, frame):
    section_height = frame.shape[0] // GRID_SIZE
    section_width = frame.shape[1] // GRID_SIZE
    classification = classify_frame(section)
    confidence = classification[2] * 100
    
    if confidence >= CONFIDENCE_THRESHOLD:
        label = f"{classification[1]}: {confidence:.2f}%"
        start_x = (i % GRID_SIZE) * section_width
        start_y = (i // GRID_SIZE) * section_height
        end_x = start_x + section_width
        end_y = start_y + section_width

        # Draw rectangle and label
        cv2.rectangle(frame, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
        cv2.putText(frame, label, (start_x, start_y + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA)

def capture_and_process():
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        sections = split_frame(frame, grid_size=GRID_SIZE)
        threads = []

        for i, section in enumerate(sections):
            thread = threading.Thread(target=process_section, args=(section, i, frame))
            threads.append(thread)
            thread.start()

        for thread in threads:
            thread.join()

        # Get current date and time
        date_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        
        # Overlay date and time on the frame
        cv2.putText(frame, date_time, (10, frame_height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
        
        # Write the frame to the output file
        out.write(frame)
        
        # Show the frame
        cv2.imshow('Live Video Feed', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

capture_and_process()

# Release everything if job is finished
cap.release()
out.release()
cv2.destroyAllWindows()


In [None]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input, decode_predictions
from datetime import datetime
import threading

# Global Variables
GRID_SIZE = 2
CONFIDENCE_THRESHOLD = 70
MODEL = tf.keras.applications.MobileNetV2(weights='imagenet')
CAPTURE_SOURCE = 0  # Webcam ID
OUTPUT_FILE = 'output.avi'
OVERLAP_PERCENT = 0.5  # 50% overlap

# Open webcam
cap = cv2.VideoCapture(CAPTURE_SOURCE)

# Get the width and height of the frame
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
out = cv2.VideoWriter(OUTPUT_FILE, cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width, frame_height))

def preprocess_frame(frame):
    img = cv2.resize(frame, (224, 224))
    img = preprocess_input(img)
    img = np.expand_dims(img, axis=0)
    return img

def classify_frame(frame):
    img = preprocess_frame(frame)
    predictions = MODEL.predict(img)
    decoded_predictions = decode_predictions(predictions, top=1)[0][0]
    return decoded_predictions

def split_frame(frame, grid_size=GRID_SIZE, overlap=OVERLAP_PERCENT):
    h, w, _ = frame.shape
    step_size_h = int(h // grid_size * (1 - overlap))
    step_size_w = int(w // grid_size * (1 - overlap))
    sections = []

    for y in range(0, h - step_size_h + 1, step_size_h):
        for x in range(0, w - step_size_w + 1, step_size_w):
            sections.append(frame[y:y + step_size_h, x:x + step_size_w])

    return sections

def process_section(section, i, frame):
    section_height = int(frame.shape[0] // GRID_SIZE * (1 - OVERLAP_PERCENT))
    section_width = int(frame.shape[1] // GRID_SIZE * (1 - OVERLAP_PERCENT))
    classification = classify_frame(section)
    confidence = classification[2] * 100
    
    if confidence >= CONFIDENCE_THRESHOLD:
        label = f"{classification[1]}: {confidence:.2f}%"
        start_x = (i % GRID_SIZE) * section_width
        start_y = (i // GRID_SIZE) * section_height
        end_x = start_x + section_width
        end_y = start_y + section_width

        # Draw rectangle and label
        cv2.rectangle(frame, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
        cv2.putText(frame, label, (start_x, start_y + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA)

def capture_and_process():
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        sections = split_frame(frame, grid_size=GRID_SIZE, overlap=OVERLAP_PERCENT)
        threads = []

        for i, section in enumerate(sections):
            thread = threading.Thread(target=process_section, args=(section, i, frame))
            threads.append(thread)
            thread.start()

        for thread in threads:
            thread.join()

        # Get current date and time
        date_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        
        # Overlay date and time on the frame
        cv2.putText(frame, date_time, (10, frame_height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
        
        # Write the frame to the output file
        out.write(frame)
        
        # Show the frame
        cv2.imshow('Live Video Feed', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

capture_and_process()

# Release everything if job is finished
cap.release()
out.release()
cv2.destroyAllWindows()


In [None]:
import cv2
import numpy as np
import torch
from ultralytics import YOLO
from datetime import datetime

# Global Variables
GRID_SIZE = 4
CONFIDENCE_THRESHOLD = 0.5
CAPTURE_SOURCE = 0  # Webcam ID
OUTPUT_FILE = 'output.avi'

# Load YOLOv8 model
model = YOLO('yolov8s.pt')  # You can use different models like 'yolov8m.pt', 'yolov8l.pt', etc.

# Open webcam
cap = cv2.VideoCapture(CAPTURE_SOURCE)

# Get the width and height of the frame
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
out = cv2.VideoWriter(OUTPUT_FILE, cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width, frame_height))

def process_frame(frame):
    results = model(frame)  # Get predictions
    detections = results[0]  # Access the first element which contains the detections
    
    for detection in detections.boxes:
        confidence = detection.conf.item()  # Convert to Python scalar
        if confidence >= CONFIDENCE_THRESHOLD:
            x_min, y_min, x_max, y_max = map(int, detection.xyxy[0].numpy())
            class_id = int(detection.cls.item())  # Convert to Python scalar
            label = model.names[class_id]
            
            # Draw rectangle and label
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
            cv2.putText(frame, f"{label}: {confidence*100:.2f}%", (x_min, y_min + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA)
    
    # Get current date and time
    date_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
    # Overlay date and time on the frame
    cv2.putText(frame, date_time, (10, frame_height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
    
    return frame

def capture_and_process():
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Process the frame
        frame = process_frame(frame)
        
        # Write the frame to the output file
        out.write(frame)
        
        # Show the frame
        cv2.imshow('Live Video Feed', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

capture_and_process()

# Release everything if job is finished
cap.release()
out.release()
cv2.destroyAllWindows()
