In [4]:
%load_ext autoreload
%autoreload 2

import urllib.request
import tarfile
import json, os
import shutil

import numpy as np
import torch.nn.functional as F
import torch
import torchvision.models as models
import cv_exp
import matplotlib.pyplot as plt
from zipfile import ZipFile
from cv_exp import pipe

pipe = pipe.Pipe()
device = pipe.device

from tqdm import tqdm
class DownloadProgressBar(tqdm):
    def update_to(self, b=1, bsize=1, tsize=None):
        if tsize is not None:
            self.total = tsize
        self.update(b * bsize - self.n)


def download_url(url, output_path):
    print('Downloading from: ', url)
    print('If you think the download speed is too slow, please use other tools to download it, ')
    print('and place the file in: ', output_path)
    print('then you can run the following cells.')
    print()
    with DownloadProgressBar(unit='B', unit_scale=True,
                             miniters=1, desc=url.split('/')[-1]) as t:
        urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to)
        
def remove(path):
    try:
        os.remove(path)
    except Exception as e:
        print(f"Fire: {path} might be removed already")

Using device:  mps


### 1. ImageNet Case


#### 1.1 Download ImageNet Validation Dataset

Register on [https://image-net.org/challenges/LSVRC/2012/2012-downloads.php#images](https://image-net.org/challenges/LSVRC/2012/2012-downloads.php#images) and get the dataset.

or

Directly from: [https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar](https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar) (6.74 GB) by the following code.

If the code download is too slow, you can download with this link manaully and put the tar file to the `./data/`


In [2]:
# download dataset if not exist
os.makedirs(os.path.join("data"), exist_ok=True)
imagenet_tar_path = os.path.join("data", "ILSVRC2012_img_val.tar")
imagenet_dataset_path = os.path.join("data", "imagenet_val")
if not os.path.exists(imagenet_tar_path):
    print("./data/ILSVRC2012_img_val.tar is not exist, start to download the file")
    download_url(
        "https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar",
        imagenet_tar_path,
    )

./data/ILSVRC2012_img_val.tar is not exist, start to download the file
Downloading from:  https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar
If you think the download speed is too slow, please use other tools to download it, 
and place the file in:  data/ILSVRC2012_img_val.tar
then you can run the following cells.



ILSVRC2012_img_val.tar: 100%|██████████| 6.74G/6.74G [03:58<00:00, 28.3MB/s]    


In [3]:
# extract data from the tar
my_tar = tarfile.open(imagenet_tar_path)
my_tar.extractall(imagenet_dataset_path)  # specify which folder to extract to
my_tar.close()

#### 1.2 Download ImageNet-S Dataset


In [5]:
import cases.imagenet_exp.datapreparation_val as datapreparation

# reorganize the imagenet data for imagenet-s processing
with open("sample_class_folder_map.json", "r") as openfile:
    # Reading from json file
    sample_class_folder_map = json.load(openfile)

for image_name, class_keycode in sample_class_folder_map.items():
    if os.path.exists(os.path.join("data", "imagenet_val", image_name)):
        dest_path = os.path.join(
            "data", "imagenet_val_processed", class_keycode, image_name
        )
        os.makedirs(os.path.dirname(dest_path), exist_ok=True)
        os.rename(os.path.join("data", "imagenet_val", image_name), dest_path)

shutil.rmtree(os.path.join("data", "imagenet_val"), ignore_errors=True)

In [6]:
datapreparation.make(
    "919",
    os.path.join("data", "imagenet_val_processed"),
    os.path.join("data", "imagenet-s"),
)
shutil.rmtree(os.path.join("data", "imagenet_val_processed"), ignore_errors=True)

In [7]:
# download the segmantation (61.3MB)
ImageNetS919_seg_url = "https://github.com/LUSSeg/ImageNet-S/releases/download/ImageNet-S/ImageNetS919-5f7f58ae1003d21da9409a8576bf7680.zip"
download_url(
    ImageNetS919_seg_url,
    os.path.join("data", "ImageNetS919-5f7f58ae1003d21da9409a8576bf7680.zip"),
)

Downloading from:  https://github.com/LUSSeg/ImageNet-S/releases/download/ImageNet-S/ImageNetS919-5f7f58ae1003d21da9409a8576bf7680.zip
If you think the download speed is too slow, please use other tools to download it, 
and place the file in:  data/ImageNetS919-5f7f58ae1003d21da9409a8576bf7680.zip
then you can run the following cells.



ImageNetS919-5f7f58ae1003d21da9409a8576bf7680.zip: 61.3MB [00:02, 22.6MB/s]                              


In [None]:
from zipfile import ZipFile

# extract segmantation and put it beside the original imagenet data
with ZipFile(
    os.path.join("data", "ImageNetS919-5f7f58ae1003d21da9409a8576bf7680.zip"), "r"
) as zObject:
    zObject.extractall(path=os.path.join("data", "imagenet-s"))
    os.rename(
        os.path.join("data", "imagenet-s", "ImageNetS919", "validation-segmentation"),
        os.path.join("data", "imagenet-s", "validation-segmentation"),
    )

shutil.rmtree(os.path.join("data", "imagenet-s", "ImageNetS919"), ignore_errors=True)
remove(os.path.join("data", "ImageNetS919-5f7f58ae1003d21da9409a8576bf7680.zip"))
remove(imagenet_tar_path)

#### 1.3 Data Sampling Validation


In [9]:
from cases.imagenet_exp.imagenet_seg import ImageNetSeg

imagenet_seg = ImageNetSeg()
val_dataset = imagenet_seg.val_dataset
val_seg_dataset = imagenet_seg.val_seg_dataset
classes_map = imagenet_seg.classes_map
class_label = imagenet_seg.class_label

resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1).to(device)
resnet.eval()
# data number before preprocessing should be 12419.
# if it has beed processed, then it should be 3976
print(f"Sample number: {len(val_dataset)}")

val_data_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=4, shuffle=False, num_workers=0
)
rs = []
# predict on the sampled data, collect the confidence score
for x, y in tqdm(val_data_loader):
    o = resnet(x.to(device))
    o = F.softmax(o, dim=1)
    rs.extend([o[i][y[i]].item() for i in range(y.shape[0])])

# the average score should be over 90%.
print(np.array(rs).mean())

seg sal not exist
Sample number: 3976


100%|██████████| 994/994 [01:02<00:00, 16.03it/s]

0.9388510142143823





### 2. ISIC Case

Please make sure https://github.com/Kaggle/kaggle-api is installed and config you kaggle API key.


#### 2.1 Download Datasets


In [2]:
# kaggle is required, and config API credentials
%pip install kaggle --upgrade

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting kaggle
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/60/d4/e4fd20f47e56adb9b8a242e8fa1605bf110513d6f95dccd530bdcb5f2e2a/kaggle-1.6.6.tar.gz (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.6/84.6 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting python-slugify (from kaggle)
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/a4/62/02da182e544a51a5c3ccf4b03ab79df279f9c60c5e82d5e8bec7ca26ac11/python_slugify-8.0.4-py2.py3-none-any.whl (10 kB)
Collecting text-unidecode>=1.3 (from python-slugify->kaggle)
  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/a6/a5/c0b6468d3824fe3fde30dbb5e1f687b291608f9473681bbf7dabbf5a87d7/text_unidecode-1.3-py2.py3-none-any.whl (78 kB)
Building wheels for collected packages: kaggle
  Building wheel for kaggle (setup.py) ... [?25ldone
[?25h  Created wheel for kaggle: filename=kagg

In [1]:
# 1.38 GB
!kaggle datasets download -d cdeotte/jpeg-isic2019-512x512
# 2.63 GB
!kaggle datasets download -d cdeotte/jpeg-melanoma-512x512

Downloading jpeg-isic2019-512x512.zip to /Users/yinnnyou/workspace/Guided-AbsoluteGrad
100%|█████████████████████████████████████▉| 1.37G/1.38G [00:39<00:00, 51.8MB/s]
100%|██████████████████████████████████████| 1.38G/1.38G [00:40<00:00, 36.9MB/s]
Downloading jpeg-melanoma-512x512.zip to /Users/yinnnyou/workspace/Guided-AbsoluteGrad
100%|█████████████████████████████████████▉| 2.63G/2.63G [01:08<00:00, 45.5MB/s]
100%|██████████████████████████████████████| 2.63G/2.63G [01:08<00:00, 41.1MB/s]


In [6]:
with ZipFile(os.path.join("jpeg-isic2019-512x512.zip"), "r") as zObject:
    zObject.extractall(path=os.path.join("data", "jpeg-isic2019-512x512"))
    remove(os.path.join("jpeg-isic2019-512x512.zip"))

with ZipFile(os.path.join("jpeg-melanoma-512x512.zip"), "r") as zObject:
    zObject.extractall(path=os.path.join("data", "jpeg-melanoma-512x512"))
    remove(os.path.join("jpeg-melanoma-512x512.zip"))

#### 2.2 Download Trained Weights


Source: https://www.kaggle.com/datasets/boliu0/melanoma-winning-models


In [9]:
# 62.7 MB for each
!kaggle datasets download -d boliu0/melanoma-winning-models -f 9c_b4ns_448_ext_15ep-newfold_best_fold0.pth -p ./cases/isic_exp/weights
!kaggle datasets download -d boliu0/melanoma-winning-models -f 9c_b4ns_448_ext_15ep-newfold_best_fold1.pth -p ./cases/isic_exp/weights
!kaggle datasets download -d boliu0/melanoma-winning-models -f 9c_b4ns_448_ext_15ep-newfold_best_fold2.pth -p ./cases/isic_exp/weights
!kaggle datasets download -d boliu0/melanoma-winning-models -f 9c_b4ns_448_ext_15ep-newfold_best_fold3.pth -p ./cases/isic_exp/weights
!kaggle datasets download -d boliu0/melanoma-winning-models -f 9c_b4ns_448_ext_15ep-newfold_best_fold4.pth -p ./cases/isic_exp/weights

Downloading 9c_b4ns_448_ext_15ep-newfold_best_fold0.pth.zip to ./cases/isic_exp/weights
 97%|████████████████████████████████████▉ | 61.0M/62.7M [00:01<00:00, 42.5MB/s]
100%|██████████████████████████████████████| 62.7M/62.7M [00:01<00:00, 39.9MB/s]
Downloading 9c_b4ns_448_ext_15ep-newfold_best_fold1.pth.zip to ./cases/isic_exp/weights
 99%|█████████████████████████████████████▌| 62.0M/62.7M [00:01<00:00, 49.0MB/s]
100%|██████████████████████████████████████| 62.7M/62.7M [00:01<00:00, 43.2MB/s]
Downloading 9c_b4ns_448_ext_15ep-newfold_best_fold2.pth.zip to ./cases/isic_exp/weights
 93%|███████████████████████████████████▏  | 58.0M/62.7M [00:01<00:00, 55.1MB/s]
100%|██████████████████████████████████████| 62.7M/62.7M [00:01<00:00, 49.8MB/s]
Downloading 9c_b4ns_448_ext_15ep-newfold_best_fold3.pth.zip to ./cases/isic_exp/weights
 99%|█████████████████████████████████████▌| 62.0M/62.7M [00:01<00:00, 42.9MB/s]
100%|██████████████████████████████████████| 62.7M/62.7M [00:01<00:00, 38.0MB/s]


In [10]:
# extract the model
for i in range(5):
    zip_path = os.path.join(
        "cases",
        "isic_exp",
        "weights",
        f"9c_b4ns_448_ext_15ep-newfold_best_fold{i}.pth.zip",
    )
    with ZipFile(zip_path, "r") as zObject:
        zObject.extractall(path=os.path.join("cases", "isic_exp", "weights"))
        remove(zip_path)

#### 2.3 Data Sampling Validation


In [11]:
from cases.isic_exp.dataset import MelanomaDatasetFx
from cases.isic_exp.models import ModelFx


df_valids = []
isic_val_datasets = []
isic_val_dataloaders = []
for fold in range(1):
    val_dataset_fx = MelanomaDatasetFx(fold=fold)
    val_dataset = val_dataset_fx.val_dataset
    args = val_dataset_fx.args
    isic_val_datasets.append(val_dataset)
    isic_val_dataloaders.append(
        torch.utils.data.DataLoader(
            val_dataset, batch_size=args.batch_size, num_workers=args.num_workers
        )
    )

isic_models = []
for fold in range(1):
    model = ModelFx(fold=fold)
    isic_models.append(model.model.to(device))

print(f"Sample number: {len(isic_val_datasets[0])}")

rs = []
# predict on the sampled data, collect the confidence score
with torch.no_grad():
    for x, y in tqdm(isic_val_dataloaders[0]):
        o = isic_models[0](x.to(device))
        o = F.softmax(o, dim=1)
        rs.extend([o[i][y[i]].item() for i in range(y.shape[0])])
        torch.cuda.empty_cache()
# # the average score of all samples should be over 90%.
# print(np.array(rs).mean())



Sample number: 3000


100%|██████████| 47/47 [00:55<00:00,  1.18s/it]


### 3. Places365 Case


#### 3.1 Dataset Download


In [13]:
# 26 GB
if not os.path.exists("places365standard_easyformat.tar"):
    download_url(
        "http://data.csail.mit.edu/places/places365/places365standard_easyformat.tar",
        "places365standard_easyformat.tar",
    )

Downloading from:  http://data.csail.mit.edu/places/places365/places365standard_easyformat.tar
If you think the download speed is too slow, please use other tools to download it, 
and place the file in:  places365standard_easyformat.tar
then you can run the following cells.



places365standard_easyformat.tar: 26.7GB [10:09, 43.8MB/s]                                


In [14]:
# extract data
my_tar = tarfile.open("places365standard_easyformat.tar")
my_tar.extractall(".")
my_tar.close()

In [15]:
if not os.path.exists(os.path.join("data", "places365")):
    os.makedirs(os.path.join("data", "places365"), exist_ok=True)
if os.path.exists(os.path.join("places365_standard", "val")):
    os.rename(
        os.path.join("places365_standard", "val"),
        os.path.join("data", "places365", "val"),
    )
if os.path.exists(os.path.join("places365_standard", "val.txt")):
    os.rename(
        os.path.join("places365_standard", "val.txt"),
        os.path.join("data", "places365", "val.txt"),
    )

In [16]:
# remove training set from disk
shutil.rmtree(os.path.join("places365_standard"), ignore_errors=True)
remove("places365standard_easyformat.tar")

#### 3.2 Data Sampling Validation


In [17]:
from cases.places365_exp.models import get_place365_models
from cases.places365_exp.dataset import Places365

places365_model = get_place365_models(arch="densenet161")
places365_model.eval()
places365_model = places365_model.to(device)

places365 = Places365()
places365_val_dataset = places365.val_dataset
print(f"Sample number: {len(places365_val_dataset)}")

places365_val_data_loader = torch.utils.data.DataLoader(
    places365_val_dataset, batch_size=4, shuffle=False, num_workers=0
)
rs = []
# predict on the sampled data, collect the confidence score
for x, y in tqdm(places365_val_data_loader):
    o = places365_model(x.to(device))
    o = F.softmax(o, dim=1)
    rs.extend([o[i][y[i]].item() for i in range(y.shape[0])])

# the average score of all samples should be over 90%.
print(np.array(rs).mean())

--2024-08-18 00:58:51--  http://places2.csail.mit.edu/models_places365/densenet161_places365.pth.tar
Resolving places2.csail.mit.edu (places2.csail.mit.edu)... 128.52.132.120
Connecting to places2.csail.mit.edu (places2.csail.mit.edu)|128.52.132.120|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 110119198 (105M) [application/x-tar]
Saving to: ‘/Users/yinnnyou/workspace/Guided-AbsoluteGrad/cases/places365_exp/pretrained/densenet161_places365.pth.tar’

     0K .......... .......... .......... .......... ..........  0% 1.19M 88s
    50K .......... .......... .......... .......... ..........  0% 2.61M 64s
   100K .......... .......... .......... .......... ..........  0% 37.2M 44s
   150K .......... .......... .......... .......... ..........  0% 7.43M 36s
   200K .......... .......... .......... .......... ..........  0% 3.29M 35s
   250K .......... .......... .......... .......... ..........  0%  425M 30s
   300K .......... .......... .......... .......... .....

Sample number: 5432


100%|██████████| 1358/1358 [03:01<00:00,  7.50it/s]

0.9105589599942254



