In [1]:
!pip install -U nbdev

Collecting nbdev
  Downloading nbdev-2.3.29-py3-none-any.whl.metadata (10 kB)
Collecting execnb>=0.1.4 (from nbdev)
  Downloading execnb-0.1.6-py3-none-any.whl.metadata (3.2 kB)
Collecting ghapi>=1.0.3 (from nbdev)
  Downloading ghapi-1.0.6-py3-none-any.whl.metadata (13 kB)
Collecting watchdog (from nbdev)
  Downloading watchdog-5.0.2-py3-none-manylinux2014_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.6/41.6 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
Collecting fastcore>=1.5.27 (from nbdev)
  Downloading fastcore-1.7.4-py3-none-any.whl.metadata (3.4 kB)
Downloading nbdev-2.3.29-py3-none-any.whl (67 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.2/67.2 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading execnb-0.1.6-py3-none-any.whl (14 kB)
Downloading ghapi-1.0.6-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.4/62.4 kB[0m [31m1.8 MB/s[0m et

In [2]:
!nbdev_new --lib_name isic_helper --repo isic-helper --branch master --user utkarsh --author Utkarsh --author_email itsmedjut@gmail.com --description helper_functions_for_isic_2024

settings.ini created.
/bin/sh: 1: quarto: not found


In [3]:
#|default_exp isic_helper

In [4]:
#|export
import h5py
from io import BytesIO
from tqdm import tqdm

from fastai.vision.all import *
from fastai.tabular.all import *

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc

In [5]:
#|export
def get_img_data(train_path, test_path, df):
    imgs_data = {}
    _df = df.set_index("isic_id")
    with h5py.File(train_path, 'r') as f:
        for key in tqdm(f.keys(), desc="Reading train files"):
            img_data = BytesIO(f[key][()])
            imgs_data[key] = img_data, _df.loc[key, "target"]
    with h5py.File(test_path, 'r') as f:
        for key in tqdm(f.keys(), desc="Reading test files"):
            img_data = BytesIO(f[key][()])
            imgs_data[key] = img_data,
    return imgs_data

In [6]:
#|export
def sample(idx, df, do_up, ratio):
    cond = df.loc[idx,"target"]==1
    pos_idx, neg_idx = idx[cond], idx[~cond]
    
    n_pos, n_neg = len(pos_idx), len(neg_idx)
    if do_up: pos_idx = np.tile(pos_idx, int(ratio*n_neg/n_pos))
    else: neg_idx = np.random.choice(neg_idx, int(n_pos/ratio) ,replace=False)
    
    sampled_idx = np.concatenate([pos_idx, neg_idx])
    np.random.shuffle(sampled_idx); return sampled_idx

In [7]:
#|export      
def sampled_splitter(items, df, do_up=False, ratio=1):
    patient_ids = df["patient_id"].unique()
    train_patients, _ = train_test_split(patient_ids, test_size=0.2)
    
    _df = df.set_index("isic_id")
    cond = _df.loc[items, "patient_id"].isin(train_patients)
    train_idx, valid_idx =  np.where(cond)[0], np.where(~cond)[0]
    
    train_idx = sample(train_idx, df, do_up, ratio)
    return list(train_idx), list(valid_idx)

In [8]:
#|export
def p_AUC(targs, preds, tpr_thresh=0.8):
    if len(targs) == 0: return None
    v_gt = abs(np.array(targs) - 1)
    v_pred = -1.0 * np.array(preds)
    
    if np.any(np.isnan(v_pred)) or np.any(np.isinf(v_pred)):
        idx_nan = np.where(np.isnan(v_pred))[0]
        print('NaN predictions: ', v_pred[idx_nan])
        idx_inf = np.where(np.isinf(v_pred))[0]
        print('Inf predictions: ', v_pred[idx_inf])
        raise ValueError("Nan or inf values in predictions")

    fpr, tpr, _ = roc_curve(v_gt, v_pred)
    max_fpr = abs(1 - tpr_thresh)

    stop = np.searchsorted(fpr, max_fpr, "right")
    if stop == len(fpr): stop -= 1
    x_interp = [fpr[stop - 1], fpr[stop]]
    y_interp = [tpr[stop - 1], tpr[stop]]
    tpr = np.append(tpr[:stop], np.interp(max_fpr, x_interp, y_interp))
    fpr = np.append(fpr[:stop], max_fpr)

    return auc(fpr, tpr)

In [9]:
#|export
class pAUCMetric(Metric):
    def reset(self): self.targs=[]; self.preds=[]
        
    def accumulate(self, learn):
        preds, targs = learn.pred, learn.y
        self.preds.extend(preds.cpu().numpy())
        self.targs.extend(targs.cpu().numpy())

    @property
    def value(self): return p_AUC(self.targs, self.preds)
    
    @property
    def name(self): return "pAUC"

In [10]:
#|export
def flat_BCE_with_logits(preds, targs, ratio):
    targs = targs.unsqueeze(1).float()
    pos_wt = torch.tensor(1/ratio, dtype=torch.float)
    
    return torch.nn.BCEWithLogitsLoss(pos_weight=pos_wt)(preds, targs)

In [11]:
%notebook ISIC_Helper.ipynb

In [12]:
from nbdev.export import nb_export
nb_export('ISIC_Helper.ipynb', '.')

In [13]:
!cat isic_helper.py

# AUTOGENERATED! DO NOT EDIT! File to edit: ISIC_Helper.ipynb.

# %% auto 0
__all__ = ['get_img_data', 'sample', 'sampled_splitter', 'p_AUC', 'pAUCMetric', 'flat_BCE_with_logits']

# %% ISIC_Helper.ipynb 3
import h5py
from io import BytesIO
from tqdm import tqdm

from fastai.vision.all import *
from fastai.tabular.all import *

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc

# %% ISIC_Helper.ipynb 4
def get_img_data(train_path, test_path, df):
    imgs_data = {}
    _df = df.set_index("isic_id")
    with h5py.File(train_path, 'r') as f:
        for key in tqdm(f.keys(), desc="Reading train files"):
            img_data = BytesIO(f[key][()])
            imgs_data[key] = img_data, _df.loc[key, "target"]
    with h5py.File(test_path, 'r') as f:
        for key in tqdm(f.keys(), desc="Reading test files"):
            img_data = BytesIO(f[key][()])
            imgs_data[key] = img_data,
    ret