In [1]:
import torch
import ultralytics

In [2]:
from ultralytics import SAM

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

# Helper function to check GPU memory usage
def print_gpu_memory():
    print(f"Allocated memory: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
    print(f"Cached memory: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")


print("\nAfter deleting references:")
print_gpu_memory()

# Step 3: Free up GPU memory manually using torch.cuda.empty_cache()
torch.cuda.empty_cache()

print("\nAfter emptying cache:")
print_gpu_memory()


After deleting references:
Allocated memory: 0.00 MB
Cached memory: 0.00 MB

After emptying cache:
Allocated memory: 0.00 MB
Cached memory: 0.00 MB


In [4]:
import gc
import torch

# Manually invoke garbage collection
gc.collect()

# Clear GPU cache
torch.cuda.empty_cache()
print(f"Memory allocated after clearing cache: {torch.cuda.memory_allocated()} bytes")

Memory allocated after clearing cache: 0 bytes


In [5]:
# import torch
# from segment_anything import SamPredictor, sam_model_registry
# from PIL import Image
# import numpy as np

# # Load the model in half precision (mixed precision) to save memory
# def load_sam_model(model_name='vit_h', checkpoint_path='weights/sam_vit_h_4b8939.pth'):
#     # Register model (use correct variant based on your hardware capabilities)
#     sam = sam_model_registry[model_name](checkpoint=checkpoint_path).half().cuda()
#     predictor = SamPredictor(sam)
#     return predictor

# # Run SAM on an image with memory optimization
# def run_sam_inference(predictor, image_path):
#     # Load and preprocess the image
#     image = Image.open(image_path).convert("RGB")
#     image = np.array(image)
    
#     # Move the image to predictor and apply half precision where possible
#     predictor.set_image(image)
    
#     # Define points for segmentation (this can be customized)
#     input_points = np.array([[image.shape[1] // 2, image.shape[0] // 2]])  # Center point as example
#     input_labels = np.array([1])  # Label indicating foreground
    
#     with torch.cuda.amp.autocast():  # Enable mixed precision
#         # Run SAM prediction
#         masks, scores, _ = predictor.predict(
#             point_coords=input_points,
#             point_labels=input_labels,
#             multimask_output=False  # Set to False if single mask is sufficient to save memory
#         )
    
#     # Clear memory explicitly after inference
#     torch.cuda.empty_cache()
#     return masks, scores

# # Usage example
# model_name = 'vit_h'  # or other variants of SAM like 'vit_b' or 'vit_l' for smaller sizes
# checkpoint_path = 'weights/sam_vit_h_4b8939.pth'  # Path to SAM checkpoint file
# image_path = 'Pallets/1000736-6303_jpg.rf.92c0d6c8403755071eac22527c9ad815.jpg'  # Path to the input image

# # Load the model and perform inference
# predictor = load_sam_model(model_name, checkpoint_path)
# masks, scores = run_sam_inference(predictor, image_path)

# # Display result (optional, based on the framework you're using)
# print("Segmentation Masks:", masks)
# print("Confidence Scores:", scores)


In [6]:
# import cv2
# import numpy as np
# import matplotlib.pyplot as plt
# from PIL import Image

# def visualize_segmentation(image_path, mask):
#     # Load the original image
#     image = cv2.imread(image_path)
#     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR (OpenCV) to RGB for matplotlib

#     # Resize mask to match the image shape if necessary
#     if mask.shape[:2] != image.shape[:2]:
#         mask = cv2.resize(mask, (image.shape[1], image.shape[0]))

#     # Create an RGBA image with the mask as an alpha channel
#     mask_color = np.zeros_like(image, dtype=np.uint8)
#     mask_color[mask > 0] = [255, 0, 0]  # Set color for the mask, e.g., red
    
#     # Blend the original image with the mask overlay
#     alpha = 0.5  # Transparency factor
#     overlay = cv2.addWeighted(image, 1 - alpha, mask_color, alpha, 0)

#     # Plot the original image with the segmentation overlay
#     plt.figure(figsize=(10, 10))
#     plt.imshow(overlay)
#     plt.axis('off')
#     plt.title("Segmentation Visualization")
#     plt.show()

# # Usage example
# image_path = 'Pallets/1000736-6303_jpg.rf.92c0d6c8403755071eac22527c9ad815.jpg'
# masks, _ = run_sam_inference(predictor, image_path)  # Assume this returns a binary mask
# visualize_segmentation(image_path, masks[0])  # Use the first mask if multiple are returned


In [7]:
# import cv2
# import matplotlib.pyplot as plt
# from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
# import numpy as np
# import gc

# def show_anns(anns):
#     if len(anns) == 0:
#         return
#     sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
#     ax = plt.gca()
#     ax.set_autoscale_on(False)
#     polygons = []
#     color = []
#     for ann in sorted_anns:
#         m = ann['segmentation']
#         img = np.ones((m.shape[0], m.shape[1], 3))
#         color_mask = np.random.random((1, 3)).tolist()[0]
#         for i in range(3):
#             img[:,:,i] = color_mask[i]
#         ax.imshow(np.dstack((img, m*0.35)))


# sam = sam_model_registry["default"](checkpoint="weights/sam_vit_h_4b8939.pth").half().cuda()
# mask_generator = SamAutomaticMaskGenerator(sam)
# image = cv2.imread('Pallets/1000736-6303_jpg.rf.92c0d6c8403755071eac22527c9ad815.jpg')
# image = cv2.resize(image, (208,208))
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# sam.to(device='cuda')
# masks = mask_generator.generate(image)
# print(len(masks))
# print(masks[0].keys())
# plt.figure(figsize=(20,20))
# plt.imshow(image)
# show_anns(masks)
# plt.axis('off')
# plt.show() 
# del(masks)
# gc.collect()

In [None]:
import torch
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
from PIL import Image
import numpy as np
import cv2
import matplotlib.pyplot as plt

# Load the SAM model and initialize the SAMMaskGenerator
def load_sam_mask_generator(model_name='vit_h', checkpoint_path='sam_vit_h_4b8939.pth'):
    sam_model = sam_model_registry[model_name](checkpoint=checkpoint_path).half().cuda()
    mask_generator = SamAutomaticMaskGenerator(sam_model)
    return mask_generator

# Generate masks for an entire image
def generate_masks(mask_generator, image_path):
    # Load and preprocess the image
    image = Image.open(image_path).convert("RGB")
    image = np.array(image)
    print(image.dtype)
    
    # Run SAM Mask Generator
    with torch.no_grad():  # Disable gradient computation to save memory
        masks = mask_generator.generate(image)
    
    return masks

# Visualize the generated masks
def visualize_masks(image_path, masks):
    # Load the original image
    image = cv2.imread(image_path)
    image = np.array(image, dtype=np.float16)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Create a blank canvas for the mask overlay
    mask_overlay = np.zeros_like(image, dtype=np.uint8)

    # Apply each mask with a random color for visualization
    for mask in masks:
        color = np.random.randint(0, 255, size=3, dtype=np.uint8)
        mask_overlay[mask['segmentation']] = color

    # Blend the original image with the mask overlay
    alpha = 0.5  # Transparency factor
    overlay = cv2.addWeighted(image, 1 - alpha, mask_overlay, alpha, 0)

    # Display the result
    plt.figure(figsize=(10, 10))
    plt.imshow(overlay)
    plt.axis('off')
    plt.title("Automatic Mask Generation with SAM")
    plt.show()

# Usage example
model_name = 'vit_h'
checkpoint_path = 'weights/sam_vit_h_4b8939.pth'
image_path = 'Pallets/1000736-6303_jpg.rf.92c0d6c8403755071eac22527c9ad815.jpg'




In [9]:

# Initialize SAMMaskGenerator and generate masks
mask_generator = load_sam_mask_generator(model_name, checkpoint_path)

In [10]:
masks = generate_masks(mask_generator, image_path)

# Visualize the segmentation masks
visualize_masks(image_path, masks)

uint8


RuntimeError: expected mat1 and mat2 to have the same dtype, but got: float != c10::Half