#Efficient SAM Example

This script provides example for how to get visualization result from EfficientSAM using ready-to-use torchscript, part of the code is borrow from MobileSAM project, many thanks!

##Setup

In [None]:
import matplotlib.pyplot as plt
import cv2
import numpy as np
import torch
from torchvision.transforms import ToTensor
from PIL import Image
import io

#Box and Point prompt

In [None]:
def run_ours_point(img_path, pts_sampled, model):
    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    img_tensor = ToTensor()(image)
    pts_sampled = torch.reshape(torch.tensor(pts_sampled), [1, 1, -1, 2])
    max_num_pts = pts_sampled.shape[2]
    pts_labels = torch.ones(1, 1, max_num_pts)

    predicted_logits, predicted_iou = model(
        img_tensor[None, ...].cuda(),
        pts_sampled.cuda(),
        pts_labels.cuda(),
    )
    predicted_logits = predicted_logits.cpu()
    all_masks = torch.ge(torch.sigmoid(predicted_logits[0, 0, :, :, :]), 0.5).numpy()
    predicted_iou = predicted_iou[0, 0, ...].cpu().detach().numpy()

    max_predicted_iou = -1
    selected_mask_using_predicted_iou = None
    for m in range(all_masks.shape[0]):
        curr_predicted_iou = predicted_iou[m]
        if (
            curr_predicted_iou > max_predicted_iou
            or selected_mask_using_predicted_iou is None
        ):
            max_predicted_iou = curr_predicted_iou
            selected_mask_using_predicted_iou = all_masks[m]
    return selected_mask_using_predicted_iou

def run_ours_box(img_path, pts_sampled, model):
    bbox = torch.reshape(torch.tensor(pts_sampled), [1, 1, 2, 2])
    bbox_labels = torch.reshape(torch.tensor([2, 3]), [1, 1, 2])
    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    img_tensor = ToTensor()(image)

    predicted_logits, predicted_iou = model(
        img_tensor[None, ...].cuda(),
        bbox.cuda(),
        bbox_labels.cuda(),
    )
    predicted_logits = predicted_logits.cpu()
    all_masks = torch.ge(torch.sigmoid(predicted_logits[0, 0, :, :, :]), 0.5).numpy()
    predicted_iou = predicted_iou[0, 0, ...].cpu().detach().numpy()

    max_predicted_iou = -1
    selected_mask_using_predicted_iou = None
    for m in range(all_masks.shape[0]):
        curr_predicted_iou = predicted_iou[m]
        if (
            curr_predicted_iou > max_predicted_iou
            or selected_mask_using_predicted_iou is None
        ):
            max_predicted_iou = curr_predicted_iou
            selected_mask_using_predicted_iou = all_masks[m]
    return selected_mask_using_predicted_iou

#Visualization Related

In [None]:
def show_mask(mask, ax, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([30/255, 144/255, 255/255, 0.8])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

def show_points(coords, labels, ax, marker_size=375):
    pos_points = coords[labels==1]
    neg_points = coords[labels==0]
    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)

def show_box(box, ax):
    x0, y0 = box[0], box[1]
    w, h = box[2] - box[0], box[3] - box[1]
    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='yellow', facecolor=(0,0,0,0), lw=5))

def show_anns_ours(mask, ax):
    ax.set_autoscale_on(False)
    img = np.ones((mask[0].shape[0], mask[0].shape[1], 4))
    img[:,:,3] = 0
    for ann in mask:
        m = ann
        color_mask = np.concatenate([np.random.random(3), [0.5]])
        img[m] = color_mask
    ax.imshow(img)

#Load torchscript models

Download link for torchscript:

EfficientSAM-s: https://www.dropbox.com/scl/fi/ziif8xudwbyyphb4tohza/efficientsam_s_gpu.jit?rlkey=8aflq9kf0bfujz5ex4lxuoq56&dl=0

EfficientSAM-ti: https://www.dropbox.com/scl/fi/lup5s4gthmlv6qf3f5zz3/efficientsam_ti_gpu.jit?rlkey=pap1xktxw50qiaey17no16bqz&dl=0

In [None]:
model = torch.jit.load('efficientsam_s_gpu.jit')

## Box segmentatoin

prepare your own image here

In [None]:
input_point = np.array([[100, 100], [300, 300]])
input_label = np.array([1])
image_path = './xxx.jpg'

image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

mask = run_ours_box(image_path, input_point, model)

plt.figure(figsize=(10,10))
plt.imshow(image)
plt.axis('off')
plt.show()

plt.figure(figsize=(10,10))
plt.imshow(image)
show_mask(mask, plt.gca())
plt.title(f"EfficientSAM", fontsize=18)
plt.axis('off')
plt.show()

## Point segmentation

In [None]:
input_point = np.array([[400, 400]])
input_label = np.array([1])
image_path = './xxx.jpg'

image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

mask = run_ours_point(image_path, input_point, model)

plt.figure(figsize=(10,10))
plt.imshow(image)
plt.axis('off')
plt.show()

plt.figure(figsize=(10,10))
plt.imshow(image)
show_mask(mask, plt.gca())
show_points(input_point, input_label, plt.gca())
plt.title(f"EfficientSAM", fontsize=18)
plt.axis('off')
plt.show()