In [1]:
import os
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torch.nn as nn
from torchvision import models
import torch.optim as optim
import cv2
import numpy as np
from torchvision import transforms

In [2]:
# Rebuild the model architecture
model = models.mobilenet_v2(pretrained=True)

# Freeze feature layers
for param in model.features.parameters():
    param.requires_grad = False

# Replace the classifier
num_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_features, 2)

# Load the saved weights
model.load_state_dict(torch.load('../Model/mobilenet_waste_classifier.pth'))

# Move to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

print("Model loaded and ready for inference.")




Model loaded and ready for inference.


In [3]:
import cv2
import numpy as np
from torchvision import transforms
from PIL import Image

# Ensure class names match training
class_names = ['Organic', 'Recyclable']

# Define transform for single-frame inference
inference_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Start webcam
cap = cv2.VideoCapture(0)
print("Webcam started. Press 'q' to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess frame
    rgb_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    input_tensor = inference_transform(rgb_image).unsqueeze(0).to(device)

    # Inference
    model.eval()
    with torch.no_grad():
        output = model(input_tensor)
        _, predicted = torch.max(output, 1)
        label = class_names[predicted.item()]

    # Display prediction on screen
    cv2.putText(frame, f'Prediction: {label}', (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow('Waste Classifier', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Webcam started. Press 'q' to quit.


In [8]:
import cv2
import numpy as np
import torch
from torchvision import transforms

# Transform for inference
inference_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

cap = cv2.VideoCapture(0)
print("Press 'q' to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if contours:
        # Find the largest contour
        largest_contour = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(largest_contour)

        # Draw bounding box
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

        # Crop the region of interest
        roi = frame[y:y+h, x:x+w]

        # Skip empty crops
        if roi.size > 0:
            # Preprocess and predict
            image = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
            image_tensor = inference_transform(image).unsqueeze(0).to(device)

            with torch.no_grad():
                output = model(image_tensor)
                _, predicted = torch.max(output, 1)
                label = class_names[predicted.item()]

            # Show label
            cv2.putText(frame, f'{label}', (x, y - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    cv2.imshow('Waste Classifier', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Press 'q' to quit.
