First some important libraries

In [4]:
from transformers import SamModel, SamConfig, SamProcessor
import torch


# Portions of this code were generated with the assistance of ChatGPT (OpenAI, 2025) and subsequently modified by the author.
# OpenAI. (2025). ChatGPT (May 2025 version) [Large language model]. https://chat.openai.com

Then load in the model and trained weights

In [5]:
# Load the model configuration
model_config = SamConfig.from_pretrained("D:/Thesis/SAM/segment-anything/sam_models/sam-vit-base")
processor = SamProcessor.from_pretrained("D:/Thesis/SAM/segment-anything/sam_models/sam-vit-base")

# Create an instance of the model architecture with the loaded configuration
original_model = SamModel(config=model_config)

my_model = SamModel(config=model_config)
#Update the model by loading the weights from saved file.
#my_model.load_state_dict(torch.load("D:/Thesis/SAM/segment-anything/notebooks/output_sam_b_brg_disk_val_80.pth"))
my_model.load_state_dict(torch.load("D:/Thesis/SAM/segment-anything/notebooks/output_sam_b_disk_val_24.pth"))

# set the device to cuda if available, otherwise use cpu
device = "cuda" if torch.cuda.is_available() else "cpu"
original_model.to(device)
my_model.to(device)

Load in test pictures

In [13]:
import os
from PIL import Image
import numpy as np
import cv2 as cv
#Apply a trained model on large image

testDirImage = "D:/Thesis/datasets/BrG_test_data/all_bad_im/"
#testDirImage ="D:/Thesis/datasets/kaggle_drive/test/very_bad_image/"

large_og_images = []
large_test_images = []
other_images = []

# for path in os.listdir(testDirImage2):
#     if path.endswith('.png'):
#         img = Image.open(testDirImage2 + path)
#         red, green, blue = img.split()
#         img = np.asarray(green)
#         print(img.shape)
#         large_test_images += [img]

for path in os.listdir(testDirImage):
    if path.endswith('.png'):
        img = cv.imread(cv.samples.findFile(testDirImage + path))
        im = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
        large_og_images += [im]
        #red, green, blue = cv.split(im)
        other_images += [im]
        img = cv.cvtColor(img,cv.COLOR_BGR2HSV)
        H, S, V = cv.split(img)
        img = np.asarray(V)

        #print(img.shape)
        large_test_images += [img]

random = np.random.randint(0,len(large_test_images))
print(random)
large_test_image = large_test_images[random]
print(np.array(large_test_image).shape)

import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 3, figsize=(15, 5))
axes[0].imshow(np.array(large_og_images[random]))
axes[1].imshow(np.array(other_images[random]), cmap='gray')
axes[2].imshow(np.array(large_test_image), cmap='gray')  # Assuming the first image is grayscale
plt.show()
#patches = patchify(large_test_image, (256, 256), step=256)  #Step=256 for 256 patches means no overlap


Let's define an array of input points

In [9]:
# Define the size of your array
array_size = 512

# Define the size of your grid
grid_size =30

# Generate the grid points
x = np.linspace(0, array_size-1, grid_size)
y = np.linspace(0, array_size-1, grid_size)

# Generate a grid of coordinates
xv, yv = np.meshgrid(x, y)

# Convert the numpy arrays to lists
xv_list = xv.tolist()
yv_list = yv.tolist()

# Combine the x and y coordinates into a list of list of lists
input_points = [[[int(x), int(y)] for x, y in zip(x_row, y_row)] for x_row, y_row in zip(xv_list, yv_list)]
print (input_points)

#We need to reshape our nxn grid to the expected shape of the input_points tensor
# (batch_size, point_batch_size, num_points_per_image, 2),
# where the last dimension of 2 represents the x and y coordinates of each point.
#batch_size: The number of images you're processing at once.
#point_batch_size: The number of point sets you have for each image.
#num_points_per_image: The number of points in each set.
input_points = torch.tensor(input_points).view(1, 1, grid_size*grid_size, 2)
np.array(input_points).shape

Let's test the models

In [14]:

my_model.eval()
# Select a random patch for segmentation

i = np.random.randint(0,len(large_test_images))

# Selectelected patch for segmentation
random_array = large_test_images[0]


single_patch = Image.fromarray(random_array)
single_patch = Image.fromarray(large_test_image)
# prepare image for the model

#First try without providing any prompt (no bounding box or input_points)
#inputs = processor(single_patch.convert("RGB"),  return_tensors="pt")
#Now try with bounding boxes. Remember to uncomment.
inputs = processor(single_patch.convert("RGB"), input_points=input_points, return_tensors="pt")

# Move the input tensor to the GPU if it's not already there
inputs = {k: v.to(device) for k, v in inputs.items()}
my_model.eval()


# forward pass
with torch.no_grad():
  outputs = my_model(**inputs, multimask_output=False)
print(outputs.iou_scores.mean().cpu().numpy())
# apply sigmoid
single_patch_prob = torch.sigmoid(outputs.pred_masks.squeeze(1))
# convert soft mask to hard mask
single_patch_prob = single_patch_prob.cpu().numpy().squeeze()
single_patch_prediction = (single_patch_prob > 0.90).astype(np.uint8)

print(single_patch_prob.shape)

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Plot the first image on the left
axes[0].imshow(np.array(single_patch), cmap='gray')  # Assuming the first image is grayscale
axes[0].set_title("Image")

# Plot the second image on the right
axes[1].imshow(single_patch_prob)  # Assuming the second image is grayscale
axes[1].set_title("Probability Map")

# Plot the second image on the right
axes[2].imshow(single_patch_prediction, cmap='gray')  # Assuming the second image is grayscale
axes[2].set_title("Prediction")

# Hide axis ticks and labels
for ax in axes:
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xticklabels([])
    ax.set_yticklabels([])

# Display the images side by side
plt.show()