In [None]:
!ls input

In [None]:
!nvidia-smi

In [None]:
!pip install ultralytics==8.3.49

In [None]:
data_path = './input'

In [None]:
import torch
import torchvision
import numpy as np
from ultralytics.utils import LOGGER
from tqdm import tqdm
import os
from PIL import Image
import time
from ultralytics import YOLO
from ultralytics.models.yolo.detect import DetectionPredictor
from ultralytics.engine.results import Results
from ultralytics.utils import ops
from random import random

import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append(f'{data_path}/scripts')
from metrics import compute_cytologia_metric_optimized

In [None]:
import cv2
import numpy as np
import os
from tqdm import tqdm
import pandas as pd

def load_image(img_path):
    with Image.open(img_path) as img:
        imgx = img.copy()
    return imgx

In [None]:

test = pd.read_csv(os.path.join(data_path, 'test.csv'))
img_folder = os.path.join(data_path, 'test_images')
for _,row in test.iterrows():
    imgpath = row['NAME']
    break

In [None]:
%%time
img = load_image(os.path.join(img_folder, imgpath))
img

In [None]:
def my_non_max_suppression(
    prediction,
    conf_thres=0.25,
    iou_thres=0.45,
    classes=None,
    agnostic=False,
    multi_label=False,
    labels=(),
    max_det=300,
    nc=0,  # number of classes (optional)
    max_time_img=0.05,
    max_nms=30000,
    max_wh=7680,
    in_place=True,
    rotated=False,
):
    """
    Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.

    Args:
        prediction (torch.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
            containing the predicted boxes, classes, and masks. The tensor should be in the format
            output by a model, such as YOLO.
        conf_thres (float): The confidence threshold below which boxes will be filtered out.
            Valid values are between 0.0 and 1.0.
        iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS.
            Valid values are between 0.0 and 1.0.
        classes (List[int]): A list of class indices to consider. If None, all classes will be considered.
        agnostic (bool): If True, the model is agnostic to the number of classes, and all
            classes will be considered as one.
        multi_label (bool): If True, each box may have multiple labels.
        labels (List[List[Union[int, float, torch.Tensor]]]): A list of lists, where each inner
            list contains the apriori labels for a given image. The list should be in the format
            output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2).
        max_det (int): The maximum number of boxes to keep after NMS.
        nc (int, optional): The number of classes output by the model. Any indices after this will be considered masks.
        max_time_img (float): The maximum time (seconds) for processing one image.
        max_nms (int): The maximum number of boxes into torchvision.ops.nms().
        max_wh (int): The maximum box width and height in pixels.
        in_place (bool): If True, the input prediction tensor will be modified in place.
        rotated (bool): If Oriented Bounding Boxes (OBB) are being passed for NMS.

    Returns:
        (List[torch.Tensor]): A list of length batch_size, where each element is a tensor of
            shape (num_boxes, 6 + num_masks + num_classes) containing the kept boxes, with columns
            (x1, y1, x2, y2, confidence, class, mask1, mask2, ..., all class probabilities).
    """

    # Checks
    assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
    assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
    if isinstance(prediction, (list, tuple)):  # YOLOv8 model in validation model, output = (inference_out, loss_out)
        prediction = prediction[0]  # select only inference output
    if classes is not None:
        classes = torch.tensor(classes, device=prediction.device)

    if prediction.shape[-1] == 6:  # end-to-end model (BNC, i.e. 1,300,6)
        output = [pred[pred[:, 4] > conf_thres][:max_det] for pred in prediction]
        if classes is not None:
            output = [pred[(pred[:, 5:6] == classes).any(1)] for pred in output]
        return output

    bs = prediction.shape[0]  # batch size (BCN, i.e. 1,84,6300)
    nc = nc or (prediction.shape[1] - 4)  # number of classes
    nm = prediction.shape[1] - nc - 4  # number of masks
    mi = 4 + nc  # mask start index
    xc = prediction[:, 4:mi].amax(1) > conf_thres  # candidates

    # Settings
    # min_wh = 2  # (pixels) minimum box width and height
    time_limit = 2.0 + max_time_img * bs  # seconds to quit after
    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)

    prediction = prediction.transpose(-1, -2)  # shape(1,84,6300) to shape(1,6300,84)
    if not rotated:
        if in_place:
            prediction[..., :4] = ops.xywh2xyxy(prediction[..., :4])  # xywh to xyxy
        else:
            prediction = torch.cat((ops.xywh2xyxy(prediction[..., :4]), prediction[..., 4:]), dim=-1)  # xywh to xyxy

    t = time.time()
    output = [torch.zeros((0, 6 + nm + nc), device=prediction.device)] * bs
    for xi, x in enumerate(prediction):  # image index, image inference
        # Apply constraints
        # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0  # width-height
        x = x[xc[xi]]  # confidence

        # Cat apriori labels if autolabelling
        if labels and len(labels[xi]) and not rotated:
            lb = labels[xi]
            v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
            v[:, :4] = ops.xywh2xyxy(lb[:, 1:5])  # box
            v[range(len(lb)), lb[:, 0].long() + 4] = 1.0  # cls
            x = torch.cat((x, v), 0)

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Detections matrix nx6 (xyxy, conf, cls)
        box, cls, mask = x.split((4, nc, nm), 1)
        # Store all class probabilities
        all_cls_probs = cls

        if multi_label:
            i, j = torch.where(cls > conf_thres)
            x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
        else:  # best class only
            conf, j = cls.max(1, keepdim=True)
            x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]

        # Filter by class
        if classes is not None:
            x = x[(x[:, 5:6] == classes).any(1)]

        # Check shape
        n = x.shape[0]  # number of boxes
        if not n:  # no boxes
            continue
        if n > max_nms:  # excess boxes
            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence and remove excess boxes

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        scores = x[:, 4]  # scores
        if rotated:
            boxes = torch.cat((x[:, :2] + c, x[:, 2:4], x[:, -1:]), dim=-1)  # xywhr
            i = ops.nms_rotated(boxes, scores, iou_thres)
        else:
            boxes = x[:, :4] + c  # boxes (offset by class)
            i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
        i = i[:max_det]  # limit detections

        output[xi] = torch.cat((x[i], all_cls_probs[i]), dim=1)

        if (time.time() - t) > time_limit:
            LOGGER.warning(f"WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded")
            break  # time limit exceeded

    return output

class MyCustomPredictor(DetectionPredictor):

    def postprocess(self, preds, img, orig_imgs):
        """Post-processes predictions and returns a list of Results objects."""
        preds = my_non_max_suppression(
            preds,
            self.args.conf,
            self.args.iou,
            agnostic=self.args.agnostic_nms,
            max_det=self.args.max_det,
            classes=self.args.classes,
        )

        if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
            orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)

        results = []
        probs = []
        for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]):
            pred,prob = pred[:,:-23], pred[:,-23:]
            pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
            results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred))
            if len(prob):
                probs.append(prob.cpu().numpy()[0])
        self.probs.extend(probs)
        return results

In [None]:
def predict_det(df, model, aug, batch_size=1, max_det=11):

    # Get model output names for later use
    for _,row in df.iterrows():
        img_name = row['NAME']
    img = load_image(os.path.join(img_folder, img_name))
    res = model(img, verbose=False, augment=aug)
    names = res[0].names

    my_predictor = MyCustomPredictor()
    my_predictor.args = model.predictor.args
    my_predictor.probs = []
    model.predictor = my_predictor

    def dec(x):
        return [float(f'{i:.3f}') for i in x]

    outs = []
    test_probs = []
    # Use tqdm with total number of images
    with tqdm(total=df.shape[0], desc="Processing images") as pbar:
        for i in range(0, df.shape[0], batch_size):
            batch_rows = df.iloc[i: i + batch_size]
            #imgs = [Image.fromarray(images_array[i]) for i in batch_rows['imgid']]
            imgs = [load_image(os.path.join(img_folder, i)) for i in batch_rows['NAME']]
            results = model(imgs, max_det=max_det, verbose=False, augment=aug)

            for j, (_, row) in enumerate(batch_rows.iterrows()):
                result = results[j]
                xyxys = result.boxes.xyxy.cpu().numpy().tolist()
                clss = result.boxes.cls.cpu().numpy().tolist()
                ids = tids[row['NAME']]
                probs = my_predictor.probs[j]
                for c, idx in enumerate(ids):
                    if c < len(clss):
                        x1, y1, x2, y2 = dec(xyxys[c])
                        cls_id = int(clss[c])
                        cls = names[cls_id]
                        outs.append([idx, row['NAME'], x1, y1, x2, y2, cls])
                    elif max_det > 1:
                        # copy last prediction but avoid exact duplication
                        outs.append([idx, row['NAME'], x1+random()*1e-5, y1, x2, y2, cls])
                    if batch_size == 1:
                        if len(outs) == len(my_predictor.probs)+1:
                            tmp = np.zeros(23)
                            tmp[cls_id] = 1
                            my_predictor.probs.append(tmp)
                        elif len(outs) != len(my_predictor.probs):
                            assert 0
                pbar.update(1)  # Update progress bar for each image
            assert len(outs) == len(my_predictor.probs)

    sub = pd.DataFrame(outs, columns=['trustii_id', 'NAME', 'x1', 'y1', 'x2', 'y2', 'class'])
    probs = np.array(my_predictor.probs)
    return sub, probs, names

def predict_cls(test, model, batch_size=32):
    """
    Predicts the class of images in a DataFrame using a given model with batched inference.

    Args:
        test: DataFrame containing image names, bounding box coordinates, and image IDs.
        model: The model used for prediction.
        batch_size: The batch size for inference.

    Returns:
        DataFrame with predicted class and probabilities for each class.
    """
    cls_res = []
    probs = []
    for i in tqdm(range(0, test.shape[0], batch_size)):
        batch = test.iloc[i:i + batch_size]
        imgs = []
        for c, row in batch.iterrows():
            img = load_image(os.path.join(img_folder, row['NAME']))
            imgs.append(img)

        res = model(imgs, verbose=False)

        batch_probs = []
        batch_cls = []

        for r in res:
            pt = r.probs.data.cpu().numpy()
            batch_probs.append(pt)
            batch_cls.append(r.names[pt.argmax()])

        probs.extend(batch_probs)
        cls_res.extend(batch_cls)

    probs = np.array(probs)
    test['class'] = cls_res
    for k, v in res[0].names.items():
        test[v] = probs[:, k]
    return test

In [None]:
%%time
aug = True
fold = 0
subs = []

test = pd.read_csv(f'{data_path}/test.csv')
test['bbox_count'] = test.groupby('NAME')['NAME'].transform('count')
test1 = test[test.bbox_count==1]
# img_dir = '{data_path}/images'
# print(img_dir)
# get predictions

tids = test.groupby('NAME')['trustii_id'].agg(list).to_dict()
df = test.groupby('NAME').agg({'trustii_id':'count'})
df.columns = ['bbox_count']
df = df.reset_index()
assert df.shape[0] == len(os.listdir(img_folder))
mask = df.bbox_count == 1
df1 = df[mask]#[:100]
df2 = df[~mask]#[:100]

# mt = test1.NAME.isin(df1.NAME) #| test1.NAME.isin(df2.NAME)
# test1 = test1[mt]

backup = []
for fold in range(4):
    det_model = YOLO(f'{data_path}/models/aug1/fold_{fold}_best.pt')
    print('aug1', f'fold {fold}')
    #sub2, probs2, names = predict_no_prob(df2, det_model, aug, test_imgs_array)
    sub2, probs2, names = predict_det(df2, det_model, aug, batch_size=1, max_det=11)
    print(sub2.shape, "should be 3316")
    sub1, probs1, names = predict_det(df1, det_model, aug, batch_size=32, max_det=1)
    print(sub1.shape, df1.shape)
    sub1.shape[0]+sub2.shape[0], test.shape[0]

    df3 = df1[~df1.NAME.isin(sub1.NAME)]
    if df3.shape[0]:
        sub3 = sub1[:df3.shape[0]].copy()
        sub3['NAME'] = df3['NAME'].values
        sub3['trustii_id'] = sub3['NAME'].apply(lambda x: tids[x][0])
        probs3 = np.zeros((df3.shape[0], probs2.shape[1]))
        sub = pd.concat([sub1,sub2,sub3])
        probs = np.vstack([probs1, probs2, probs3])
    else:
        sub = pd.concat([sub1,sub2])
        probs = np.vstack([probs1, probs2])

    for k,v in names.items():
        sub[v] = probs[:,k]
        # only keep 5 decimal places
        sub[v] = sub[v].round(5)
    sub = test[['trustii_id']].merge(sub, on='trustii_id', how='left')
    subs.append(sub)
    backup.append(sub.copy())

df1 = df1.merge(subs[0],on='NAME',how='left')
for model_name in ['crop','rotate','rotate50']:
    for fold in range(4):
        cls_model = YOLO(f'{data_path}/models/{model_name}/fold_{fold}_best.pt')
        print(model_name, f'fold {fold}')
        sub = predict_cls(df1, cls_model, batch_size=16)
        sub = test1[['trustii_id','NAME']].merge(sub, on='NAME', how='left')
        subs.append(sub)
        backup.append(sub.copy())

sub0 = subs[0]
class_names = sub0.columns[-23:]
sub0['bbox_count'] = sub0.groupby('NAME')['NAME'].transform('count')
mask = sub0.bbox_count == 1
probs = sub0.loc[mask, class_names].values
for sub1 in subs[1:]:
    sub1['bbox_count'] = sub1.groupby('NAME')['NAME'].transform('count')
    mask2 = sub1.bbox_count == 1
    probs += sub1.loc[mask2,class_names].values
sub0['class_id'] = 0
sub0.loc[mask, 'class_id'] = probs.argmax(axis=1)
sub0.loc[mask, 'class'] = sub0.loc[mask, 'class_id'].apply(lambda x: class_names[x])

In [None]:
sub0 = sub0.reset_index(drop=True)
cols = ['trustii_id', 'NAME', 'x1', 'y1', 'x2', 'y2', 'class']
sub0[cols].to_csv(f'rerun_fix_test_aug1_crop_rotate_rotate50_avg.csv', index=False)