# [SegFormer](https://huggingface.co/docs/transformers/v4.27.2/en/model_doc/segformer) Fine-tuning for liver tumor segmentation 

## Environment setup

In [None]:
# Installation of required Python packages
!pip install transformers datasets evaluate gradio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.27.4-py3-none-any.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m60.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.11.0-py3-none-any.whl (468 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m468.7/468.7 KB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 KB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gradio
  Downloading gradio-3.24.1-py3-none-any.whl (15.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.7/15.7 MB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (

In [None]:
# Importing required Python packages
from pathlib import Path
from datasets import load_dataset, Dataset, Image
from huggingface_hub import notebook_login, login

In [None]:
# Authentication with HuggingFace
login(token='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', add_to_git_credential=True)

Token is valid.
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful


## Dataset loading

In [None]:
ds = load_dataset('trpakov/liver-cancer-segmentation')

Downloading readme:   0%|          | 0.00/554 [00:00<?, ?B/s]

Downloading and preparing dataset None/None to /root/.cache/huggingface/datasets/trpakov___parquet/trpakov--liver-cancer-segmentation-82b245893aa85433/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/226M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/341M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/340M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/455M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/456M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/455M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/452M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/454M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/458M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/456M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/456M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating test split:   0%|          | 0/900 [00:00<?, ? examples/s]

Generating val split:   0%|          | 0/2695 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/14380 [00:00<?, ? examples/s]

Dataset parquet downloaded and prepared to /root/.cache/huggingface/datasets/trpakov___parquet/trpakov--liver-cancer-segmentation-82b245893aa85433/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
# Assigninng the data subsets to different variables. Not all validation images are used due to memory constraints.
train = ds['train']
val = ds['val'].select(range(1000))
test = ds['test']

In [None]:
# Create mappings between label names and ids
id2label = {0: 'Background', 1: 'Liver', 2: 'Tumor'}
label2id = {v: k for k, v in id2label.items()}
num_labels = len(id2label)

## Model Fine-tuning

In [None]:
from transformers import AutoImageProcessor
# Load the SegFormer image processor to prepare the images and annotations for the model.
checkpoint = "nvidia/mit-b2"
image_processor = AutoImageProcessor.from_pretrained(checkpoint, do_reduce_labels=False)

Downloading (…)rocessor_config.json:   0%|          | 0.00/272 [00:00<?, ?B/s]

Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [None]:
from torchvision.transforms import ColorJitter
 #Random changes in the brightness, contrast, saturation and hue of the images, applied during training
jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)

In [None]:
# Two preprocessing functions to prepare the images and annotations for the model. These functions convert the images into pixel_values and annotations to labels. 
# For the training set, jitter is applied before providing the images to the image processor. 
# For the validation set, the image processor crops and normalizes the images because no data augmentation is applied during testing.

def train_transforms(example_batch):
    images = [jitter(x) for x in example_batch["image"]]
    labels = [x for x in example_batch["annotation"]]
    inputs = image_processor(images, labels)
    return inputs


def val_transforms(example_batch):
    images = [x for x in example_batch["image"]]
    labels = [x for x in example_batch["annotation"]]
    inputs = image_processor(images, labels)
    return inputs

In [None]:
train.set_transform(train_transforms)
val.set_transform(val_transforms)

In [None]:
# Including a metric during training to evaluate the model’s performance.
# For image segmentation, the mean Intersection over Union (IoU) metric is used.

import evaluate

metric = evaluate.load("mean_iou")

Downloading builder script:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

In [None]:
import torch
from torch import nn
import numpy as np
import pandas as pd

In [None]:
# A function to compute the metrics. 
# The model predictions need to be converted to logits first, and then reshaped to match the size of the labels.

def compute_metrics(eval_pred):
    with torch.no_grad():
        logits, labels = eval_pred
        logits_tensor = torch.from_numpy(logits)
        logits_tensor = nn.functional.interpolate(
            logits_tensor,
            size=labels.shape[-2:],
            mode="bilinear",
            align_corners=False,
        ).argmax(dim=1)

        pred_labels = logits_tensor.detach().cpu().numpy()
        metrics = metric._compute(
            predictions=pred_labels,
            references=labels,
            num_labels=num_labels,
            ignore_index=0,
            reduce_labels=False,
        )

        # add per category metrics as individual key-value pairs
        per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
        per_category_iou = metrics.pop("per_category_iou").tolist()

        metrics.update({'accuracy_Liver': per_category_accuracy[1]})
        metrics.update({'accuracy_Tumor': per_category_accuracy[2]})
        
        metrics.update({'iou_Liver': per_category_iou[1]})
        metrics.update({'iou_Tumor': per_category_iou[2]})
        
        return metrics

In [None]:
from transformers import AutoModelForSemanticSegmentation, TrainingArguments, Trainer
# Load the SegFormer model to be fine-tuned
model = AutoModelForSemanticSegmentation.from_pretrained(checkpoint, num_labels=num_labels, id2label=id2label, label2id=label2id)

Downloading (…)lve/main/config.json:   0%|          | 0.00/70.0k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/99.0M [00:00<?, ?B/s]

Some weights of the model checkpoint at nvidia/mit-b2 were not used when initializing SegformerForSemanticSegmentation: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.classifier.bias', 'decode_head.linear_c.1.proj.bias', 'decode_head.batch_norm.bias', 'decode_head.linear_c.0.proj.bias', 'decode_head.batch_norm.running_var', 'decode_head.linear_c.3.pro

In [None]:
# Defining the training hyperparameters in TrainingArguments. 
training_args = TrainingArguments(
    output_dir="segformer-b2-liver-cancer",
    learning_rate=6e-5,
    max_steps=20_000,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    evaluation_strategy="steps",
    save_strategy="steps",
    save_steps=1000,
    eval_steps=1000,
    logging_steps=1,
    remove_unused_columns=False,
    push_to_hub=True,
    hub_private_repo=True,
    load_best_model_at_end=True,
)

In [None]:
# Passing the training arguments to the Trainer along with the model, dataset, tokenizer, data collator, and compute_metrics function.
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=val,
    compute_metrics=compute_metrics,
)

Cloning https://huggingface.co/trpakov/segformer-b2-liver-cancer into local empty directory.


In [None]:
# Calling train() to finetune the model.
train_results = trainer.train()



Step,Training Loss,Validation Loss,Mean Iou,Mean Accuracy,Overall Accuracy,Accuracy Liver,Accuracy Tumor,Iou Liver,Iou Tumor
1000,0.0119,0.020937,0.49327,0.877757,0.898911,0.902354,0.85316,0.896599,0.583211
2000,0.0099,0.014364,0.560176,0.897332,0.954148,0.963395,0.83127,0.953905,0.726624
3000,0.0017,0.014822,0.483703,0.754178,0.926902,0.955014,0.553343,0.946814,0.504295
4000,0.0217,0.013191,0.53926,0.839393,0.94094,0.957467,0.721318,0.94762,0.67016
5000,0.008,0.010665,0.568712,0.910695,0.951755,0.958438,0.862951,0.951895,0.754242
6000,0.0006,0.010634,0.584189,0.917394,0.966978,0.975048,0.859739,0.965996,0.78657
7000,0.0022,0.009742,0.573403,0.886284,0.955226,0.966447,0.806122,0.954209,0.766001
8000,0.0009,0.009235,0.58737,0.943658,0.960207,0.962901,0.924416,0.958242,0.803867
9000,0.0056,0.008842,0.588403,0.93403,0.958726,0.962745,0.905315,0.957353,0.807856
10000,0.0102,0.008864,0.588333,0.9215,0.961841,0.968407,0.874594,0.960967,0.804032


  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label


In [None]:
# Uploading the trained model to the HuggingFace Hub so it can easily be used for inference
trainer.push_to_hub(tags='image-segmentation')

To https://huggingface.co/trpakov/segformer-b2-liver-cancer
   551ed47..06624eb  main -> main

   551ed47..06624eb  main -> main

To https://huggingface.co/trpakov/segformer-b2-liver-cancer
   06624eb..5a9886f  main -> main

   06624eb..5a9886f  main -> main



'https://huggingface.co/trpakov/segformer-b2-liver-cancer/commit/06624eb0b240c7974ee829463808dee9bc45966a'