### TRIAL 2

In [3]:
import os,cv2,copy,torch
import numpy as np
import torch.nn.functional as F
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms.functional import to_tensor
from torchvision import transforms as TF
from torch.optim import AdamW
from PIL import Image
from transformers import SegformerForSemanticSegmentation
from transformers import get_scheduler
from sklearn.metrics import jaccard_score

In [2]:
class GreenDetection(Dataset):
    def __init__(self, images_dir, masks_dir, transform=None):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.transform = transform
        self.images = [img for img in os.listdir(images_dir) if img.lower().endswith('.jpg')]
        self.masks = [mask.lower().replace('.jpg', '.png') for mask in self.images]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_path = os.path.join(self.images_dir, self.images[idx])
        mask_path = os.path.join(self.masks_dir, self.masks[idx])
        image = Image.open(image_path).convert("RGB")
        mask = Image.open(mask_path).convert('L')  # Convert mask to grayscale
        
        # Convert mask to binary format with 0 and 1 values
        mask = np.array(mask)
        mask = (mask > 0).astype(np.uint8)  # Assuming non-zero pixels are Green
        
        # Convert to PIL Image for consistency in transforms
        mask = Image.fromarray(mask)

        if self.transform:
            image = self.transform(image)
            # Assuming to_tensor transform is included which scales pixel values between 0-1
            # mask = to_tensor(mask)  # Convert the mask to [0, 1] range
        mask = TF.functional.resize(img=mask, size=[360, 640], interpolation=Image.NEAREST)
        mask = TF.functional.to_tensor(mask)
        mask = (mask > 0).long()  # Threshold back to binary and convert to LongTensor
        return image, mask

def mean_iou(preds, labels, num_classes):
    # Flatten predictions and labels
    preds_flat = preds.view(-1)
    labels_flat = labels.view(-1)

    # Check that the number of elements in the flattened predictions
    # and labels are equal
    if preds_flat.shape[0] != labels_flat.shape[0]:
        raise ValueError(f"Predictions and labels have mismatched shapes: "
                         f"{preds_flat.shape} vs {labels_flat.shape}")

    # Calculate the Jaccard score for each class
    iou = jaccard_score(labels_flat.cpu().numpy(), preds_flat.cpu().numpy(),
                        average=None, labels=range(num_classes))

    # Return the mean IoU
    return np.mean(iou)

In [13]:
# Define the appropriate transformations
transform = TF.Compose([
    TF.Resize((360, 640)),
    TF.ToTensor(),
    TF.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create the dataset
train_dataset = GreenDetection(images_dir=r'D:\College\External Project\UCC\Data\public\img\train',
                           masks_dir=r'D:\College\External Project\UCC\Data\public\ann\train',
                           transform=transform)

valid_dataset = GreenDetection(images_dir=r'D:\College\External Project\UCC\Data\public\img\valid',
                           masks_dir=r'D:\College\External Project\UCC\Data\public\ann\valid',
                           transform=transform)

# Create the data loaders
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=2, shuffle=False, num_workers=0)

print(len(train_dataset), len(valid_dataset))

4322 689


In [14]:
# Load the pre-trained model
model = SegformerForSemanticSegmentation.from_pretrained('nvidia/segformer-b2-finetuned-ade-512-512')

# Adjust the number of classes for Green Detection
model.config.num_labels = 2  

In [15]:
# Check for CUDA acceleration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model.load_state_dict(torch.load('best_model.pth'))
model.to(device);
print(f"Using {device} for training")

Using cuda for training


In [16]:
# Define the optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

# Define the learning rate scheduler
num_epochs = 30
num_training_steps = num_epochs * len(train_loader)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)

# Placeholder for best mean IoU and best model weights
best_iou = 0.0
best_model_wts = copy.deepcopy(model.state_dict())

for epoch in range(num_epochs):
    model.train()
    
    train_iterator = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}", unit="batch")
    
    for batch in train_iterator:
        images, masks = batch
        images = images.to(device)
        masks = masks.to(device).long()  # Ensure masks are LongTensors

        # Remove the channel dimension from the masks tensor
        masks = masks.squeeze(1)  # This changes the shape from [batch, 1, H, W] to [batch, H, W]
        optimizer.zero_grad()

        # Pass pixel_values and labels to the model
        outputs = model(pixel_values=images, labels=masks,return_dict=True)
        
        loss = outputs["loss"]
        loss.backward()

        optimizer.step()
        lr_scheduler.step()
        outputs = F.interpolate(outputs["logits"], size=masks.shape[-2:], mode="bilinear", align_corners=False)
        
        train_iterator.set_postfix(loss=loss.item())
    

    # Evaluation loop for each epoch
    model.eval()
    total_iou = 0
    num_batches = 0
    valid_iterator = tqdm(valid_loader, desc="Validation", unit="batch")
    for batch in valid_iterator:
       
        images, masks = batch
        images = images.to(device)
        masks = masks.to(device).long()
    
        with torch.no_grad():
            # Get the logits from the model and apply argmax to get the predictions
            outputs = model(pixel_values=images,return_dict=True)
            outputs = F.interpolate(outputs["logits"], size=masks.shape[-2:], mode="bilinear", align_corners=False)
            preds = torch.argmax(outputs, dim=1)
            preds = torch.unsqueeze(preds, dim=1)

        preds = preds.view(-1)
        masks = masks.view(-1)
        # Compute IoU
        iou = mean_iou(preds, masks, model.config.num_labels)
        total_iou += iou
        num_batches += 1
        valid_iterator.set_postfix(mean_iou=iou)
    
    epoch_iou = total_iou / num_batches
    print(f"Epoch {epoch+1}/{num_epochs} - Mean IoU: {epoch_iou:.4f}")

    # Check for improvement
    if epoch_iou > best_iou:
        print(f"Validation IoU improved from {best_iou:.4f} to {epoch_iou:.4f}")
        best_iou = epoch_iou
        best_model_wts = copy.deepcopy(model.state_dict())
        torch.save(best_model_wts, 'best_model.pth')

# After all epochs, load the best model weights - optional
model.load_state_dict(torch.load('best_model.pth'))
print("Loaded the best model weights!")

Epoch 1/30: 100%|██████████| 2161/2161 [12:17<00:00,  2.93batch/s, loss=0.183] 
Validation: 100%|██████████| 345/345 [01:27<00:00,  3.96batch/s, mean_iou=0.851]


Epoch 1/30 - Mean IoU: 0.8211
Validation IoU improved from 0.0000 to 0.8211


Epoch 2/30: 100%|██████████| 2161/2161 [10:50<00:00,  3.32batch/s, loss=0.106] 
Validation: 100%|██████████| 345/345 [01:11<00:00,  4.81batch/s, mean_iou=0.851]


Epoch 2/30 - Mean IoU: 0.7785


Epoch 3/30: 100%|██████████| 2161/2161 [10:52<00:00,  3.31batch/s, loss=0.148] 
Validation: 100%|██████████| 345/345 [01:13<00:00,  4.67batch/s, mean_iou=0.872]


Epoch 3/30 - Mean IoU: 0.8061


Epoch 4/30: 100%|██████████| 2161/2161 [10:17<00:00,  3.50batch/s, loss=0.113] 
Validation: 100%|██████████| 345/345 [01:13<00:00,  4.67batch/s, mean_iou=0.889]


Epoch 4/30 - Mean IoU: 0.8310
Validation IoU improved from 0.8211 to 0.8310


Epoch 5/30: 100%|██████████| 2161/2161 [10:42<00:00,  3.36batch/s, loss=0.0743]
Validation: 100%|██████████| 345/345 [01:13<00:00,  4.66batch/s, mean_iou=0.897]


Epoch 5/30 - Mean IoU: 0.8247


Epoch 6/30: 100%|██████████| 2161/2161 [10:47<00:00,  3.34batch/s, loss=0.151] 
Validation: 100%|██████████| 345/345 [01:15<00:00,  4.59batch/s, mean_iou=0.89] 


Epoch 6/30 - Mean IoU: 0.8327
Validation IoU improved from 0.8310 to 0.8327


Epoch 7/30: 100%|██████████| 2161/2161 [10:47<00:00,  3.34batch/s, loss=0.0815]
Validation: 100%|██████████| 345/345 [01:15<00:00,  4.56batch/s, mean_iou=0.883]


Epoch 7/30 - Mean IoU: 0.8194


Epoch 8/30: 100%|██████████| 2161/2161 [10:41<00:00,  3.37batch/s, loss=0.0902]
Validation: 100%|██████████| 345/345 [01:15<00:00,  4.59batch/s, mean_iou=0.9]  


Epoch 8/30 - Mean IoU: 0.8353
Validation IoU improved from 0.8327 to 0.8353


Epoch 9/30: 100%|██████████| 2161/2161 [10:40<00:00,  3.37batch/s, loss=0.0963]
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.61batch/s, mean_iou=0.903]


Epoch 9/30 - Mean IoU: 0.8337


Epoch 10/30: 100%|██████████| 2161/2161 [10:48<00:00,  3.33batch/s, loss=0.0597]
Validation: 100%|██████████| 345/345 [01:15<00:00,  4.58batch/s, mean_iou=0.9]  


Epoch 10/30 - Mean IoU: 0.8349


Epoch 11/30: 100%|██████████| 2161/2161 [10:46<00:00,  3.34batch/s, loss=0.0502]
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.66batch/s, mean_iou=0.893]


Epoch 11/30 - Mean IoU: 0.8369
Validation IoU improved from 0.8353 to 0.8369


Epoch 12/30: 100%|██████████| 2161/2161 [10:45<00:00,  3.35batch/s, loss=0.0712]
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.61batch/s, mean_iou=0.893]


Epoch 12/30 - Mean IoU: 0.8357


Epoch 13/30: 100%|██████████| 2161/2161 [10:40<00:00,  3.37batch/s, loss=0.0826]
Validation: 100%|██████████| 345/345 [01:13<00:00,  4.70batch/s, mean_iou=0.902]


Epoch 13/30 - Mean IoU: 0.8314


Epoch 14/30: 100%|██████████| 2161/2161 [10:44<00:00,  3.35batch/s, loss=0.0537]
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.61batch/s, mean_iou=0.896]


Epoch 14/30 - Mean IoU: 0.8342


Epoch 15/30: 100%|██████████| 2161/2161 [10:39<00:00,  3.38batch/s, loss=0.107] 
Validation: 100%|██████████| 345/345 [01:13<00:00,  4.68batch/s, mean_iou=0.89] 


Epoch 15/30 - Mean IoU: 0.8382
Validation IoU improved from 0.8369 to 0.8382


Epoch 16/30: 100%|██████████| 2161/2161 [10:42<00:00,  3.36batch/s, loss=0.0378]
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.64batch/s, mean_iou=0.888]


Epoch 16/30 - Mean IoU: 0.8385
Validation IoU improved from 0.8382 to 0.8385


Epoch 17/30: 100%|██████████| 2161/2161 [10:44<00:00,  3.35batch/s, loss=0.0294]
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.65batch/s, mean_iou=0.889]


Epoch 17/30 - Mean IoU: 0.8373


Epoch 18/30: 100%|██████████| 2161/2161 [10:39<00:00,  3.38batch/s, loss=0.0196]
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.62batch/s, mean_iou=0.885]


Epoch 18/30 - Mean IoU: 0.8367


Epoch 19/30: 100%|██████████| 2161/2161 [10:44<00:00,  3.35batch/s, loss=0.0653]
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.63batch/s, mean_iou=0.89] 


Epoch 19/30 - Mean IoU: 0.8300


Epoch 20/30: 100%|██████████| 2161/2161 [10:38<00:00,  3.38batch/s, loss=0.0582] 
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.65batch/s, mean_iou=0.877]


Epoch 20/30 - Mean IoU: 0.8343


Epoch 21/30: 100%|██████████| 2161/2161 [10:44<00:00,  3.36batch/s, loss=0.0564]
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.64batch/s, mean_iou=0.881]


Epoch 21/30 - Mean IoU: 0.8366


Epoch 22/30: 100%|██████████| 2161/2161 [10:39<00:00,  3.38batch/s, loss=0.0429]
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.62batch/s, mean_iou=0.89] 


Epoch 22/30 - Mean IoU: 0.8375


Epoch 23/30: 100%|██████████| 2161/2161 [10:43<00:00,  3.36batch/s, loss=0.036] 
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.64batch/s, mean_iou=0.876]


Epoch 23/30 - Mean IoU: 0.8369


Epoch 24/30: 100%|██████████| 2161/2161 [10:38<00:00,  3.38batch/s, loss=0.0378]
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.60batch/s, mean_iou=0.882]


Epoch 24/30 - Mean IoU: 0.8363


Epoch 25/30: 100%|██████████| 2161/2161 [10:43<00:00,  3.36batch/s, loss=0.0382]
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.63batch/s, mean_iou=0.887]


Epoch 25/30 - Mean IoU: 0.8365


Epoch 26/30: 100%|██████████| 2161/2161 [10:38<00:00,  3.38batch/s, loss=0.0204]
Validation: 100%|██████████| 345/345 [01:13<00:00,  4.67batch/s, mean_iou=0.876]


Epoch 26/30 - Mean IoU: 0.8360


Epoch 27/30: 100%|██████████| 2161/2161 [10:43<00:00,  3.36batch/s, loss=0.0309]
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.65batch/s, mean_iou=0.874]


Epoch 27/30 - Mean IoU: 0.8357


Epoch 28/30: 100%|██████████| 2161/2161 [10:38<00:00,  3.38batch/s, loss=0.0199]
Validation: 100%|██████████| 345/345 [01:14<00:00,  4.66batch/s, mean_iou=0.883]


Epoch 28/30 - Mean IoU: 0.8367


Epoch 29/30: 100%|██████████| 2161/2161 [10:48<00:00,  3.33batch/s, loss=0.039] 
Validation: 100%|██████████| 345/345 [01:15<00:00,  4.55batch/s, mean_iou=0.882]


Epoch 29/30 - Mean IoU: 0.8376


Epoch 30/30: 100%|██████████| 2161/2161 [10:44<00:00,  3.35batch/s, loss=0.0196]
Validation: 100%|██████████| 345/345 [01:15<00:00,  4.57batch/s, mean_iou=0.881]


Epoch 30/30 - Mean IoU: 0.8371
Loaded the best model weights!


In [12]:
import torch
torch.cuda.empty_cache()

In [8]:
#  Load the trained model 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SegformerForSemanticSegmentation.from_pretrained('nvidia/segformer-b2-finetuned-ade-512-512')

# Replace with the actual number of classes
model.config.num_labels = 2  

# Load the state from the fine-tuned model and set to model.eval() mode
model.load_state_dict(torch.load('best_model.pth'))
model.to(device)
model.eval()

  return self.fget.__get__(instance, owner)()


SegformerForSemanticSegmentation(
  (segformer): SegformerModel(
    (encoder): SegformerEncoder(
      (patch_embeddings): ModuleList(
        (0): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
          (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        )
        (1): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (layer_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        )
        (2): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(128, 320, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (layer_norm): LayerNorm((320,), eps=1e-05, elementwise_affine=True)
        )
        (3): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(320, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)

In [5]:
# Perform transformations
data_transforms = TF.Compose([
    TF.ToPILImage(),
    TF.Resize((360, 640)),
    TF.ToTensor(),
    TF.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [6]:
# RLE CODE
def mask_to_rle(mask: np.ndarray):
    """
    Convert a binary mask to RLE format.
    :param mask: numpy array, 1 - mask, 0 - background
    :return: RLE array
    """
    pixels = mask.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return [int(x) for x in runs]

In [10]:
# Preprocess the frame
import json, os


def inference_model(frame, mask_path_dir="", image_name=""):
    input_tensor = data_transforms(frame).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = model(pixel_values=input_tensor, return_dict=True)
        outputs = F.interpolate(outputs["logits"], size=(
            360, 640), mode="bilinear", align_corners=False)

        preds = torch.argmax(outputs, dim=1)
        preds = torch.unsqueeze(preds, dim=1)
        predicted_mask = (torch.sigmoid(preds) > 0.5).float()

        # Create an RGB version of the mask to overlay on the original frame
        mask_np = predicted_mask.cpu().squeeze().numpy()
        mask_resized = cv2.resize(mask_np, (frame.shape[1], frame.shape[0]))
        return mask_resized


all_dict = {}
for image_name in os.listdir(r'D:\College\External Project\UCC\Data\public\img\test'):
    temp_dict = {}
    image = Image.open(
        r'D:\College\External Project\UCC\Data\public\img\test' + "\\" + image_name)
    my_image = np.array(image)
    # print(type(my_image))
    new_array = inference_model(np.array(image))
    new_array = new_array.astype(int)
    temp_dict["counts"] = mask_to_rle(new_array)
    temp_dict["height"] = image.size[1]
    temp_dict["width"] = image.size[0]
    all_dict[image_name] = temp_dict

print(all_dict)
# json_obj = json.dumps(all_dict)
# with open(r'D:\College\External Project\results2.json', 'w') as f:
#     f.write(json_obj)

{'00352d240eb120fccdc325a8fc20d9d9.jpg': {'counts': [1, 90277, 90291, 367, 90682, 356, 91070, 341, 91414, 6, 91458, 332, 91844, 325, 92230, 315, 92619, 310, 93007, 306, 93395, 302, 93781, 300, 94167, 298, 94552, 297, 94938, 295, 95323, 294, 95708, 293, 96094, 291, 96482, 287, 96867, 286, 97252, 285, 97637, 284, 98022, 283, 98407, 282, 98792, 281, 99178, 279, 99563, 278, 99948, 277, 100334, 275, 100719, 274, 101104, 273, 101488, 273, 101874, 271, 102258, 271, 102643, 270, 103028, 269, 103412, 269, 103797, 268, 104182, 267, 104567, 266, 104952, 265, 105336, 265, 105721, 264, 106106, 263, 106491, 262, 106875, 262, 107260, 261, 107644, 261, 108029, 260, 108413, 260, 108797, 260, 109181, 260, 109565, 260, 109949, 260, 110333, 260, 110717, 260, 111101, 260, 111485, 260, 111870, 259, 112254, 259, 112638, 259, 113022, 259, 113406, 259, 113790, 259, 114174, 259, 114558, 259, 114943, 258, 115327, 258, 115711, 258, 116096, 257, 116480, 257, 116866, 255, 117250, 255, 117634, 255, 118018, 255, 1184