In [None]:
import torch
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")
print(f"Number of GPUs: {torch.cuda.device_count()}")

In [None]:
class_map = {
    1: "LONGITUDINAL",
    2: "TRANSVERSE",
    3: "PATCHING",
    4: "BLOCK",
    5: "POTHOLE"
}

for i in range(len(boxes)):
    if scores[i] >= score_threshold:
        box = boxes[i].cpu().numpy()
        label_id = labels[i].item()
        label_name = class_map.get(label_id, str(label_id))
        ax.add_patch(plt.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1],
                                   fill=False, color='red', linewidth=2))
        ax.text(box[0], box[1], f'{label_name}:{scores[i]:.2f}', 
                fontsize=12, color='yellow', bbox=dict(facecolor='red', alpha=0.5))
        mask = masks[i, 0].cpu().numpy()
        ax.imshow(mask, alpha=0.5, cmap='jet')


In [None]:
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from PIL import Image

# Load the Mask R-CNN model architecture with the correct number of classes
model = maskrcnn_resnet50_fpn(pretrained=False, num_classes=6)
model.load_state_dict(torch.load("maskrcnn_resnet50_trained_1.pth"))
model.eval()

# Load and preprocess an image
image = Image.open("3.jpg").convert("RGB")
img_tensor = F.to_tensor(image)

# Perform inference
with torch.no_grad():
    prediction = model([img_tensor])
    import matplotlib.pyplot as plt

    # Get prediction results
    pred = prediction[0]
    boxes = pred['boxes']
    labels = pred['labels']
    scores = pred['scores']
    masks = pred['masks']

    # Set a score threshold for visualization
    score_threshold = 0.5

    plt.figure(figsize=(12, 12))
    plt.imshow(image)
    ax = plt.gca()

    for i in range(len(boxes)):
        if scores[i] >= score_threshold:
            label_id = labels[i].item()
            label_name = class_map.get(label_id, str(label_id))
            print(label_name)

    for i in range(len(boxes)):
        if scores[i] >= score_threshold:
            box = boxes[i].cpu().numpy()
            label_id = labels[i].item()
            label_name = class_map.get(label_id, str(label_id))
            ax.add_patch(plt.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1],
                                       fill=False, color='red', linewidth=2))
            ax.text(box[0], box[1], f'{label_name}:{scores[i]:.2f}', 
                    fontsize=12, color='yellow', bbox=dict(facecolor='red', alpha=0.5))
            # Draw mask if available
            mask = masks[i, 0].cpu().numpy()
            ax.imshow(mask, alpha=0.5, cmap='jet')

    plt.axis('off')
    plt.show()
print(prediction)

In [None]:
#test:

from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from PIL import Image
import os
import numpy as np
from tqdm.notebook import tqdm
import cv2
from torchvision.ops import nms
import json
import base64
from io import BytesIO
import matplotlib.pyplot as plt

# Define a color map for each class (R, G, B)
class_colors = {
    1: (1.0, 0.0, 0.0),   # LONGITUDINAL - Red
    2: (0.0, 1.0, 0.0),   # TRANSVERSE - Green
    3: (0.0, 0.0, 1.0),   # PATCHING - Blue
    4: (1.0, 1.0, 0.0),   # BLOCK - Yellow
    5: (1.0, 0.0, 1.0),   # POTHOLE - Magenta
}

# Load the Mask R-CNN model architecture with the correct number of classes
model = maskrcnn_resnet50_fpn(pretrained=False, num_classes=6)
model.load_state_dict(torch.load("maskrcnn_resnet50_trained_1.pth"))
model.eval()

# List of image file paths
image_dir = "data/coco/split/test"  # replace with your image directory
img_list = os.listdir(image_dir)
image_files = [os.path.join(image_dir, img) for img in img_list if img.endswith(('.jpg', '.png'))]

output_dir = "data/coco/split/detected_frames2"
os.makedirs(output_dir, exist_ok=True)

imagecounter = 0
for img_path in image_files:
    # if imagecounter > 3:     break
    imagecounter += 1
    print(f"Processing image {imagecounter}/{len(image_files)}: {img_path}")
    # Load and preprocess the image
    image = Image.open(img_path).convert("RGB")
    img_tensor = F.to_tensor(image)
    with torch.no_grad():
        prediction = model([img_tensor])
        pred = prediction[0]
        boxes = pred['boxes']
        labels = pred['labels']
        scores = pred['scores']
        masks = pred['masks']
        # Apply Non-Maximum Suppression (NMS)

        # Convert boxes and scores to CPU and numpy for NMS
        boxes_cpu = boxes.cpu()
        scores_cpu = scores.cpu()

        # Perform NMS
        keep_indices = nms(boxes_cpu, scores_cpu, iou_threshold=0.)

        # Filter predictions using NMS indices
        boxes = boxes[keep_indices]
        labels = labels[keep_indices]
        scores = scores[keep_indices]
        masks = masks[keep_indices]

        plt.figure(figsize=(12, 12))
        plt.imshow(image)
        ax = plt.gca()
        # Define class_map if not already defined
        class_map = {
            1: "LONGITUDINAL",
            2: "TRANSVERSE",
            3: "PATCHING",
            4: "BLOCK",
            5: "POTHOLE"
        }

        shapes = []
        for i in range(len(boxes)):
            if scores[i] >= score_threshold:
                box = boxes[i].cpu().numpy()
                label_id = labels[i].item()
                label_name = class_map.get(label_id, str(label_id))
                color = class_colors.get(label_id, (1.0, 1.0, 1.0))  # default white
                ax.text(
                    box[0], box[1], f'{label_name}:{scores[i]:.2f}',
                    fontsize=12, color='yellow'
                )

                mask = masks[i, 0].cpu().numpy()
                mask_rgba = np.zeros((*mask.shape, 4), dtype=np.float32)
                mask_rgba[..., 0] = color[0]
                mask_rgba[..., 1] = color[1]
                mask_rgba[..., 2] = color[2]
                mask_rgba[..., 3] = mask * 0.5

                ax.imshow(mask_rgba, interpolation='none')

                contour_mask = (mask > 0.5).astype(np.uint8) * 255
                contours, _ = cv2.findContours(contour_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                for contour in contours:
                    contour = contour.squeeze()
                    if contour.ndim == 2 and contour.shape[0] > 2:
                        ax.plot(contour[:, 0], contour[:, 1], color=color, linewidth=2)
                        points = contour.tolist()
                        distress_coverage = int(np.sum(contour_mask > 0))
                        shapes.append({
                            "label": label_name,
                            "points": points,
                            "group_id": None,
                            "shape_type": "polygon",
                            "flags": {},
                            "distress_coverage": distress_coverage
                        })

        annotation = {
            "version": "5.0.1",
            "flags": {},
            "shapes": shapes,
            "imagePath": os.path.basename(img_path),
            "imageData": None,
            "imageHeight": image.height,
            "imageWidth": image.width
        }
                
        # Save annotation as JSON in the output directory
        json_path = os.path.splitext(os.path.join(output_dir, os.path.basename(img_path)))[0] + ".json"
        with open(json_path, "w") as f:
            json.dump(annotation, f, indent=2)
        # Encode the image as base64 and add to annotation
        buffer = BytesIO()
        image.save(buffer, format="JPEG")
        encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
        annotation["imageData"] = encoded_image

        # Save annotation with imageData included
        with open(json_path, "w") as f:
            json.dump(annotation, f, indent=2)
        


        plt.axis('off')
        output_path = os.path.join(output_dir, os.path.basename(img_path))
        plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
        plt.close()
