# Importing Data

In [1]:
import pandas as pd

!pip install -Uqq fastkaggle --no-index --find-links=file:///kaggle/input/library-fastkaggle/
from fastkaggle import *

from shutil import copyfile
copyfile(src = "../input/isic-helper/isic_helper.py", dst = "../working/isic_helper.py")
from isic_helper import *

In [2]:
path = setup_comp('isic-2024-challenge')
path.ls()

(#6) [Path('../input/isic-2024-challenge/sample_submission.csv'),Path('../input/isic-2024-challenge/train-metadata.csv'),Path('../input/isic-2024-challenge/test-metadata.csv'),Path('../input/isic-2024-challenge/test-image.hdf5'),Path('../input/isic-2024-challenge/train-image'),Path('../input/isic-2024-challenge/train-image.hdf5')]

In [3]:
df = pd.read_csv(path/'train-metadata.csv', low_memory=False)
df

Unnamed: 0,isic_id,target,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,...,lesion_id,iddx_full,iddx_1,iddx_2,iddx_3,iddx_4,iddx_5,mel_mitotic_index,mel_thick_mm,tbp_lv_dnn_lesion_confidence
0,ISIC_0015670,0,IP_1235828,60.0,male,lower extremity,3.04,TBP tile: close-up,3D: white,20.244422,...,,Benign,Benign,,,,,,,97.517282
1,ISIC_0015845,0,IP_8170065,60.0,male,head/neck,1.10,TBP tile: close-up,3D: white,31.712570,...,IL_6727506,Benign,Benign,,,,,,,3.141455
2,ISIC_0015864,0,IP_6724798,60.0,male,posterior torso,3.40,TBP tile: close-up,3D: XP,22.575830,...,,Benign,Benign,,,,,,,99.804040
3,ISIC_0015902,0,IP_4111386,65.0,male,anterior torso,3.22,TBP tile: close-up,3D: XP,14.242329,...,,Benign,Benign,,,,,,,99.989998
4,ISIC_0024200,0,IP_8313778,55.0,male,anterior torso,2.73,TBP tile: close-up,3D: white,24.725520,...,,Benign,Benign,,,,,,,70.442510
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401054,ISIC_9999937,0,IP_1140263,70.0,male,anterior torso,6.80,TBP tile: close-up,3D: XP,22.574335,...,IL_9520694,Benign,Benign,,,,,,,99.999988
401055,ISIC_9999951,0,IP_5678181,60.0,male,posterior torso,3.11,TBP tile: close-up,3D: white,19.977640,...,,Benign,Benign,,,,,,,99.999820
401056,ISIC_9999960,0,IP_0076153,65.0,female,anterior torso,2.05,TBP tile: close-up,3D: XP,17.332567,...,IL_9852274,Benign,Benign,,,,,,,99.999416
401057,ISIC_9999964,0,IP_5231513,30.0,female,anterior torso,2.80,TBP tile: close-up,3D: XP,22.288570,...,,Benign,Benign,,,,,,,100.000000


In [4]:
img_path = path/'train-image.hdf5'
test_img_path = path/'test-image.hdf5'

imgs_data = get_img_data(img_path, test_img_path, df)

Reading train files: 100%|██████████| 401059/401059 [01:46<00:00, 3770.87it/s]
Reading test files: 100%|██████████| 3/3 [00:00<00:00, 208.59it/s]


# Setting up the DataLoaders

In [5]:
def get_items(path): return df.isic_id

def get_x(key): return PILImage.create(imgs_data[key][0])

def get_y(key): return imgs_data[key][1]

In [6]:
from fastai.vision.all import *

item_tfms = [Resize(128, pad_mode='zeros'), ToTensor()]
batch_tfms = [IntToFloatTensor(), *aug_transforms(flip_vert=True, p_affine=0.5,
                                                  p_lighting=0.5, pad_mode='zeros')]

In [7]:
def _splitter(items, valid_idx):
    train_idx, _ = IndexSplitter(valid_idx)(items)
    train_idx = np.array(train_idx)
    train_idx = sample(train_idx, df, do_up=False, ratio=1/300)
    train_idx = sample(train_idx, df, do_up=True, ratio=2/3)
    return list(train_idx), list(valid_idx)

In [8]:
def build_dls(valid_idx):
    def splitter(items): return _splitter(items, valid_idx)
    dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
                       get_items=get_items, get_x=get_x,
                       get_y=get_y, item_tfms=item_tfms,
                       splitter=splitter, batch_tfms=batch_tfms)
    dls = dblock.dataloaders(img_path, bs=32, shuffle=True)
    return dls

# Cross-Validation

In [9]:
from sklearn.model_selection import StratifiedGroupKFold

sgkf = StratifiedGroupKFold(5, shuffle=True)
groups = df.patient_id

In [10]:
def loss_func(targs, preds): return flat_BCE_with_logits(targs, preds, ratio=3/2)

oof_preds = df[['isic_id']].copy()

In [11]:
log = pd.DataFrame(columns=['Learner','train_loss','valid_loss','pAUC'])

for i,splits in enumerate(sgkf.split(df, df.target, groups)):
    dls = build_dls(splits[1])
    learn = vision_learner(dls, 'resnet18', normalize=True, n_out=1, pretrained=True,
                           loss_func=loss_func, metrics=pAUCMetric).to_fp16()
    learn.fit_one_cycle(1, 1e-3)
    log.loc[i] = [learn] + learn.recorder.final_record
    
    valid = dls.valid
    preds = learn.get_preds(dl=valid)[0].numpy().reshape(-1)
    oof_preds.loc[splits[1], 'vision_preds'] = preds

model.safetensors:   0%|          | 0.00/46.8M [00:00<?, ?B/s]

  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


epoch,train_loss,valid_loss,pAUC,time
0,0.291514,0.256944,0.144298,07:28


  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


epoch,train_loss,valid_loss,pAUC,time
0,0.26435,0.290403,0.132132,07:12


  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


epoch,train_loss,valid_loss,pAUC,time
0,0.256317,0.261145,0.134701,07:17


  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


epoch,train_loss,valid_loss,pAUC,time
0,0.266939,0.328326,0.138605,07:09


  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


epoch,train_loss,valid_loss,pAUC,time
0,0.258691,0.318453,0.144637,07:14


  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


# Exporting Learner and OOF predictions

In [12]:
log.sort_values(by='pAUC', ascending=False, inplace=True)
best_learner = log.iloc[0,0]

best_learner.export(fname="fitted_resnet18.pkl")

In [13]:
oof_preds.to_csv('vision_oof_preds')