In [1]:
from datasets.dataset import SingleExpertDataset
from easydict import EasyDict as edict
import os

data_root = "/media/yesindeed/DATADRIVE1/mount/remote_cse/datasets/multitask-moe"

dataset = SingleExpertDataset(
    args=edict(data_root=data_root, input_size=224,
               metadata_path=os.path.join(data_root, "metadata.csv")),
    subsets=["chexpert-pleural effusion"],
    split="test",
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset.classes

{'no pleural effusion found': 0, 'pleural effusion': 1}

In [3]:
dataset.class_texts

[['An image of diseased', 'An image of no findings'],
 ['An image of no pleural effusion found', 'An image of pleural effusion']]

In [10]:
data = dataset[9]

print(data["label"])
print(data["class_raw"])
print(data["dataset"])

tensor([1, 0])
no findings
chexpert


In [None]:
from datasets.dataset import MultiLabelDataset

data_root = "/media/yesindeed/DATADRIVE1/mount/remote_cse/datasets/multitask-moe"

dataset = MultiLabelDataset(
    args=edict(data_root=data_root, input_size=224, metadata_path=os.path.join(data_root, "metadata_multilabel.csv")),
    subsets=["lung-pet-ct-dx"],
    split="test",
)

In [None]:
dataset.classes

In [None]:
dataset.class_texts

In [None]:
data = dataset[11]

print(data["label"])
print(data["class_raw"])
print(data["dataset"])

In [None]:
import matplotlib.pyplot as plt
from torchvision.transforms import functional as F


plt.imshow(F.to_pil_image(data["image"]), cmap="gray")

In [None]:
data

In [None]:
import torch

bs = 128
dim = 1024
num_class = 10

image_feature = torch.rand(bs, dim)
text_feature = torch.rand(num_class, 2, dim)

out = image_feature @ text_feature.transpose(1, 2)

out.shape

In [None]:
import torch

ckpt = torch.load(
    "/media/yesindeed/DATADRIVE1/mount/remote_cse/experiments/multitask-moe/biomedclip/seed0/lp/train_chexpert/checkpoint-latest.pth"
)

In [None]:
ckpt["model"].keys()

In [None]:
import numpy as np
import torch
from sklearn.metrics import roc_auc_score


def one_hot(a, num_classes):
    return np.squeeze(np.eye(num_classes)[a.reshape(-1)])


prob = torch.softmax(torch.rand(5, 3), dim=-1).numpy()
print(prob)

array = torch.randint(0, 3, (5,)).numpy()
print(array)

print(one_hot(array, 3))

auc = roc_auc_score(one_hot(array, 3), prob)
print(auc)

In [None]:
array = torch.randint(0, 2, (5,)).cuda()


def one_hot(x, num_classes, on_value=1.0, off_value=0.0):
    x = x.long().view(-1, 1)
    return torch.full((x.size()[0], num_classes), off_value, device=x.device).scatter_(1, x, on_value)


print(array)
print(one_hot(array.unsqueeze(-1), 2))

In [122]:
import numpy as np
import torch

dataset = "lung-pet-ct-dx"

npz_train = np.load(
    f"/media/yesindeed/DATADRIVE1/mount/remote_cse/experiments/multitask-moe/biomedclip/seed0/lp/train_{dataset}/pred_latest.npz",
    allow_pickle=True,
)

npz_eval = np.load(
    f"/media/yesindeed/DATADRIVE1/mount/remote_cse/experiments/multitask-moe/biomedclip/seed0/lp/train_{dataset}/eval/{dataset}/pred.npz",
    allow_pickle=True,
)

In [123]:
(npz_train["labels"] == npz_eval["labels"]).all()

True

In [124]:
np.unique(npz_eval["labels"])

array([0, 1, 2, 3, 4])

In [127]:
npz_train["logits_ad"].astype(np.float32)

array([[21.484375 , 21.75     ],
       [15.5390625, 19.46875  ],
       [12.4765625, 19.234375 ],
       ...,
       [20.375    , 21.625    ],
       [14.984375 , 19.046875 ],
       [12.0546875, 20.40625  ]], dtype=float32)

In [126]:
npz_eval["logits_ad"]

array([[21.484375 , 21.75     ],
       [15.53125  , 19.46875  ],
       [12.4765625, 19.234375 ],
       ...,
       [20.375    , 21.625    ],
       [14.9921875, 19.046875 ],
       [12.0546875, 20.40625  ]], dtype=float32)

In [103]:
list(npz_eval.keys())

['logits_ad',
 'logits_diag',
 'labels',
 'feature_pretrained',
 'feature_residual',
 'text_features']

In [100]:
text_features_train_epoch0 = npz_train["text_features"]

text_features_train_epoch0

array([array([[ 0.15717892, -0.00215659, -0.27264616, ...,  0.16133256,
               -0.00571735,  0.13413092],
              [ 0.16950054, -0.03136753, -0.4428963 , ...,  0.03429547,
               -0.21187079,  0.16039711]], dtype=float32)              ,
       array([[ 0.14270435,  0.02773401, -0.5488286 , ..., -0.03261211,
               -0.19043043,  0.20697245],
              [ 0.09909903,  0.37599623, -0.31482425, ...,  0.06914981,
                0.220462  ,  0.3773029 ],
              [-0.07504829,  0.02507934, -0.37401712, ...,  0.11928586,
               -0.05772017,  0.29035434],
              [-0.11145949,  0.01316652, -0.52024966, ..., -0.03096075,
               -0.00833256,  0.36999142],
              [ 0.06279644,  0.06263534, -0.36327353, ...,  0.1476551 ,
               -0.00780672,  0.44318208]], dtype=float32)              ],
      dtype=object)

In [106]:
text_features_train_epoch0[1].shape

(5, 512)

In [92]:
npz_eval["text_features"]

array([array([[ 0.08925845, -0.06269898, -0.28041315, ...,  0.08074013,
               -0.04141391,  0.10935175],
              [ 0.17665195,  0.01470548, -0.5259161 , ..., -0.0117187 ,
               -0.21989319,  0.1867312 ]], dtype=float32)              ,
       array([[ 0.17665195,  0.01470544, -0.52591604, ..., -0.01171874,
               -0.21989328,  0.18673114],
              [ 0.01523243,  0.37906063, -0.32097137, ...,  0.05924025,
                0.22164223,  0.39922267],
              [-0.07075804, -0.03993231, -0.39417708, ...,  0.0965216 ,
               -0.08527295,  0.3095482 ],
              [-0.06608851, -0.00075469, -0.49799964, ..., -0.05840805,
               -0.01869716,  0.43124393],
              [ 0.02646679,  0.03762252, -0.29148975, ...,  0.20281917,
                0.05875021,  0.4052459 ]], dtype=float32)              ],
      dtype=object)

In [120]:
from models import BiomedCLIP

backbone = BiomedCLIP()
backbone.eval()

texts = ["An image of diseased", "An image of no findings"]

tf = backbone.encode_text(texts)

tf

tensor([[ 0.1040, -0.0940, -0.2636,  ...,  0.0590,  0.0278,  0.1165],
        [ 0.1927,  0.0240, -0.5639,  ..., -0.0768, -0.1951,  0.1295]],
       grad_fn=<MmBackward0>)

In [121]:
backbone.encode_text(texts)

tensor([[ 0.1340, -0.0516, -0.2961,  ...,  0.0469, -0.0656,  0.0728],
        [ 0.1696, -0.0794, -0.5426,  ..., -0.0459, -0.2034,  0.2239]],
       grad_fn=<MmBackward0>)

In [118]:
cos = torch.nn.CosineSimilarity()

print(cos(tf, backbone.encode_text(texts)))

tensor([1.0000, 1.0000], grad_fn=<SumBackward1>)


In [104]:
text_features = [torch.tensor(x) for x in text_features_train_epoch0]
image_feature_pretrained = torch.tensor(npz_eval["feature_pretrained"])
image_feature_residual = torch.tensor(npz_eval["feature_residual"])

image_feature = image_feature_pretrained + image_feature_residual

BadZipFile: Bad CRC-32 for file 'feature_residual.npy'

In [62]:
from utils.metrics import multitask_classification_report

prob_diag_all = torch.softmax(torch.from_numpy(npz_train["logits_diag"]), dim=-1).numpy()

report = multitask_classification_report(
    prob_diag_all,
    npz_train["labels"][:, -1],
    classes=list(
        {
            "no findings": 0,
            "pulmonary embolism": 1,
            "pulmonary embolism1": 2,
            "pulmonary embolism2": 3,
            "pulmonary embolism3": 4,
        }.keys()
    ),
    suffix="-diag",
)

report

{'auc-diag': 0.9039362399043596, 'acc-diag': 0.8856629213483146}

In [24]:
npz_eval["logits_ad"]

array([[23.218271, 28.55718 ],
       [22.623829, 29.882902],
       [17.40026 , 25.26371 ],
       ...,
       [21.250603, 28.289215],
       [20.972004, 28.157858],
       [22.025307, 28.142067]], dtype=float32)

In [13]:
npz_eval["logits_ad"]

BadZipFile: Bad CRC-32 for file 'logits_ad.npy'