# Colab Inferencing Notebook
The purpose of this file is to save and version control the Inferencing.ipynb file in Google Colab.  
The code in this file (and colab) will later be used for a .py file that will sent and processed in NRP (Nautilus).  
Colab's purpose is to debug this specific file and ensure that there are no errors before being sent to NRP since there is no debugging in NRP.  This file is subject to change 

In [None]:
!pip install -U bitsandbytes
!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu121
!pip install peft
!pip install transformers
!pip install accelerate

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
import torch
from transformers import AutoModelForImageTextToText, AutoProcessor, BitsAndBytesConfig
from peft import PeftModel
from PIL import Image
import json
from pathlib import Path
import time
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np

# Modify later for Nautilus!
MODEL_PATH = "/content/drive/MyDrive/VLM_MODELS/Qwen2.5-VL-7B-Instruct"
OUTPUT_DIR = "/content/drive/MyDrive/output/waste_detection"   # Fine-tuned LoRA
DATASET_DIR = "/content/drive/MyDrive/model_datasets/dataset2"
TRAINING_DATA = "/content/drive/MyDrive/model_datasets/train2.jsonl"
VAL_DATA = "/content/drive/MyDrive/model_datasets/valid2.jsonl"

print("Loading fine-tuned Qwen2.5-VL-7B-Instruct model...")


'''
Set up as Lora, change to QLoRA before running 
'''
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_threshold=6.0,
)

# Load Base Model
base_model = AutoModelForImageTextToText.from_pretrained(
    MODEL_PATH,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

# Load LoRA Weights
model = PeftModel.from_pretrained(
    base_model,
    OUTPUT_DIR,
)

# Load Processor (MUST load from fine-tuned directory)
processor = AutoProcessor.from_pretrained(
    OUTPUT_DIR,
    trust_remote_code=True
)
print("Model loaded successfully!")

In [None]:
def run_inference(image_path):
    """Run inference on a single image."""

    image = Image.open(image_path).convert("RGB")

    messages = [
        {
            "role": "system",
            "content": (
                "You are an assistant that detects waste objects in images. "
                "The possible waste categories are: Glass-A, Green waste-A, Metal, Organics-A, "
                "Organics-B-NOT, Organics-E, Others, Paper-A, Paper-B, Paper-D, "
                "Plastic-A, Plastic-B, Plastic-C, Plastic-D, Plastic-E, Plastic-G, Wood."
            )
        },
        {
            "role": "user",
            "content": [
                {"type": "image", "image": image},
                {"type": "text", "text": "Detect the waste objects in this image and output bounding boxes in Pascal VOC format."}
            ]
        }
    ]

    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    inputs = processor(
        text=[text],
        images=[image],
        return_tensors="pt",
        padding=True
    ).to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=1024,
            do_sample=False
        )

    generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
    result = processor.decode(generated_ids, skip_special_tokens=True)
    return result

print("Inference function ready.")

In [None]:
TEST_FILE = "/content/drive/MyDrive/model_datasets/test2.jsonl"

if not Path(TEST_FILE).exists():
    print("No test.jsonl found. Skipping quick test.")
else:
    with open(TEST_FILE, "r") as f:
        test_example = json.loads(f.readline())

    image_path = test_example['messages'][1]['content'][0]['image'].replace("file://", "")
    ground_truth = test_example['messages'][2]['content']

    print("Running inference on:", image_path)
    pred = run_inference(image_path)

    print("\nPrediction:\n", pred)
    print("\nGround Truth:\n", ground_truth)

In [None]:
TEST_FILE = "/content/drive/MyDrive/model_datasets/test2.jsonl"
results = []

if not Path(TEST_FILE).exists():
    print("No test.jsonl found. Cannot run 5-image test.")
else:
    print("Running inference on 5 images...\n")

    test_data = []
    with open(TEST_FILE, "r") as f:
        for i, line in enumerate(f):
            if i >= 5:
                break
            test_data.append(json.loads(line))

    for i, example in enumerate(test_data):
        image_path = example['messages'][1]['content'][0]['image'].replace("file://", "")
        gt = example['messages'][2]['content']

        start = time.time()
        pred = run_inference(image_path)
        end = time.time()

        results.append({
            "image_path": image_path,
            "prediction": pred,
            "ground_truth": gt
        })

        print(f"Img {i+1}: {Path(image_path).name}")
        print("Time:", round(end-start,1),"s")
        print()

    print("Finished 5-image test.")

In [None]:
def parse_detections(text):
    dets = []
    for line in text.strip().split("\n"):
        parts = line.split()
        if len(parts) >= 5:
            try:
                dets.append({
                    "class": parts[0],
                    "xmin": int(parts[1]),
                    "xmax": int(parts[2]),
                    "ymin": int(parts[3]),
                    "ymax": int(parts[4])
                })
            except:
                continue
    return dets

if not results:
    print("No results available. Run Cell 5 first.")
else:
    total_pred = 0
    total_gt = 0

    for r in results:
        preds = parse_detections(r["prediction"])
        gts   = parse_detections(r["ground_truth"])
        total_pred += len(preds)
        total_gt += len(gts)

    print("Total predicted:", total_pred)
    print("Total ground truth:", total_gt)

In [None]:
# The parse_detections function is already defined above (Cell 6)
# but redefine for safety in Colab execution order:

def parse_detections(text):
    dets = []
    for line in text.strip().split("\n"):
        parts = line.split()
        if len(parts) >= 5:
            try:
                dets.append({
                    "class": parts[0],
                    "xmin": int(parts[1]),
                    "xmax": int(parts[2]),
                    "ymin": int(parts[3]),
                    "ymax": int(parts[4])
                })
            except:
                continue
    return dets

# IoU and NMS functions unchanged
def calculate_iou(a, b):
    x1 = max(a['xmin'], b['xmin'])
    y1 = max(a['ymin'], b['ymin'])
    x2 = min(a['xmax'], b['xmax'])
    y2 = min(a['ymax'], b['ymax'])

    if x2 <= x1 or y2 <= y1:
        return 0.0

    inter = (x2 - x1) * (y2 - y1)
    area1 = (a['xmax'] - a['xmin']) * (a['ymax'] - a['ymin'])
    area2 = (b['xmax'] - b['xmin']) * (b['ymax'] - b['ymin'])
    union = area1 + area2 - inter

    return inter / union if union > 0 else 0.0

def apply_nms(dets, thr=0.5):
    if not dets:
        return []
    dets = sorted(dets, key=lambda d: (d['xmax']-d['xmin'])*(d['ymax']-d['ymin']), reverse=True)
    keep=[]
    for d in dets:
        good=True
        for k in keep:
            if d["class"]==k["class"] and calculate_iou(d,k)>thr:
                good=False
                break
        if good:
            keep.append(d)
    return keep

# GPT did this "Simplified for Colab", may cause an error!

#def get_class_color(cls):
#   return "orange"

def get_class_color(class_name):
    """Assign consistent colors to classes"""
    color_map = {
        'Plastic-E': 'orange',
        'Plastic-A': 'darkorange',
        'Plastic-B': 'coral',
        'Plastic-C': 'orangered',
        'Plastic-D': 'tomato',
        'Plastic-G': 'chocolate',
        'Paper-A': 'cyan',
        'Paper-B': 'turquoise',
        'Paper-D': 'deepskyblue',
        'Metal': 'gray',
        'Wood-C': 'magenta',
        'Glass-A': 'lime',
        'Organics-A': 'yellow',
        'Organics-B-NOT': 'gold',
        'Organics-E': 'khaki',
        'Green waste-A': 'limegreen',
        'Others': 'white'
    }
    return color_map.get(class_name, 'red')

print("Visualization functions loaded.")

In [None]:
if not results:
    print("No results available. Run Cell 5 first.")
else:
    for i in range(len(results)):
        print(f"Showing image {i+1}/{len(results)}")
        image_path = results[i]["image_path"]
        image = Image.open(image_path).convert("RGB")

        preds = apply_nms(parse_detections(results[i]["prediction"]))

        plt.figure(figsize=(12,8))
        plt.imshow(image)
        ax = plt.gca()

        for det in preds:
            x1,y1 = det["xmin"], det["ymin"]
            w = det["xmax"] - det["xmin"]
            h = det["ymax"] - det["ymin"]
            rect = patches.Rectangle((x1,y1), w, h, linewidth=2, edgecolor='orange', facecolor='none')
            ax.add_patch(rect)
            ax.text(x1, y1, det["class"], color="white", backgroundcolor="orange")

        plt.axis("off")
        plt.show()

In [None]:
def show_coordinate_system():
    fig, ax = plt.subplots(figsize=(6,6))
    ax.set_xlim(0,100)
    ax.set_ylim(0,100)
    ax.invert_yaxis()
    ax.set_title("Pascal VOC Coordinates (Top-Left Origin)")
    ax.scatter([0],[0],c="red",s=100)
    ax.text(5,5,"(0,0)",color="red")
    ax.grid(True)
    plt.show()

show_coordinate_system()