# [BEiT](https://huggingface.co/docs/transformers/model_doc/beit) Fine-tuning for liver tumor segmentation 

## Environment setup

In [None]:
# Installation of required Python packages
!pip install transformers datasets evaluate gradio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# Importing required Python packages
from pathlib import Path
from datasets import load_dataset, Dataset, Image
from huggingface_hub import notebook_login, login

In [None]:
# Authentication with HuggingFace
login(token='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', add_to_git_credential=True)

Token is valid.
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful


## Dataset loading

In [None]:
ds = load_dataset('trpakov/liver-cancer-segmentation')

Downloading readme:   0%|          | 0.00/554 [00:00<?, ?B/s]

Downloading and preparing dataset None/None to /root/.cache/huggingface/datasets/trpakov___parquet/trpakov--liver-cancer-segmentation-82b245893aa85433/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/226M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/455M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/456M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/455M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/452M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/454M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/458M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/456M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/456M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/341M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/340M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating test split:   0%|          | 0/900 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/14380 [00:00<?, ? examples/s]

Generating val split:   0%|          | 0/2695 [00:00<?, ? examples/s]

Dataset parquet downloaded and prepared to /root/.cache/huggingface/datasets/trpakov___parquet/trpakov--liver-cancer-segmentation-82b245893aa85433/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
# Assigninng the data subsets to different variables. Not all validation images are used due to memory constraints.
train = ds['train']
val = ds['val'].select(range(1000))
test = ds['test']

In [None]:
# Create mappings between label names and ids
id2label = {0: 'Background', 1: 'Liver', 2: 'Tumor'}
label2id = {v: k for k, v in id2label.items()}
num_labels = len(id2label)

## Model Fine-tuning

In [None]:
from transformers import AutoImageProcessor
# Load the SegFormer image processor to prepare the images and annotations for the model.
checkpoint = "microsoft/beit-base-patch16-224-pt22k-ft22k"
image_processor = AutoImageProcessor.from_pretrained(checkpoint, do_reduce_labels=False)

Downloading (…)rocessor_config.json:   0%|          | 0.00/276 [00:00<?, ?B/s]

Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [None]:
from torchvision.transforms import ColorJitter
 #Random changes in the brightness, contrast, saturation and hue of the images, applied during training
jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)

In [None]:
# Two preprocessing functions to prepare the images and annotations for the model. These functions convert the images into pixel_values and annotations to labels. 
# For the training set, jitter is applied before providing the images to the image processor. 
# For the validation set, the image processor crops and normalizes the images because no data augmentation is applied during testing.

def train_transforms(example_batch):
    images = [jitter(x) for x in example_batch["image"]]
    labels = [x for x in example_batch["annotation"]]
    inputs = image_processor(images, segmentation_maps=labels)
    return inputs


def val_transforms(example_batch):
    images = [x for x in example_batch["image"]]
    labels = [x for x in example_batch["annotation"]]
    inputs = image_processor(images, segmentation_maps=labels)
    return inputs

In [None]:
train.set_transform(train_transforms)
val.set_transform(val_transforms)

In [None]:
# Including a metric during training to evaluate the model’s performance.
# For image segmentation, the mean Intersection over Union (IoU) metric is used.

import evaluate

metric = evaluate.load("mean_iou")

Downloading builder script:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

In [None]:
import torch
from torch import nn
import numpy as np

In [None]:
# A function to compute the metrics. 
# The model predictions need to be converted to logits first, and then reshaped to match the size of the labels.


def compute_metrics(eval_pred):
    with torch.no_grad():
        logits, labels = eval_pred
        logits_tensor = torch.from_numpy(logits)
        logits_tensor = nn.functional.interpolate(
            logits_tensor,
            size=labels.shape[-2:],
            mode="bilinear",
            align_corners=False,
        ).argmax(dim=1)

        pred_labels = logits_tensor.detach().cpu().numpy()
        metrics = metric._compute(
            predictions=pred_labels,
            references=labels,
            num_labels=num_labels,
            ignore_index=0,
            reduce_labels=False,
        )

        # add per category metrics as individual key-value pairs
        per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
        per_category_iou = metrics.pop("per_category_iou").tolist()

        metrics.update({'accuracy_Liver': per_category_accuracy[1]})
        metrics.update({'accuracy_Tumor': per_category_accuracy[2]})
        
        metrics.update({'iou_Liver': per_category_iou[1]})
        metrics.update({'iou_Tumor': per_category_iou[2]})
        
        return metrics

In [None]:
from transformers import AutoModelForSemanticSegmentation, TrainingArguments, Trainer
# Load the SegFormer model to be fine-tuned
model = AutoModelForSemanticSegmentation.from_pretrained(checkpoint, num_labels=num_labels, id2label=id2label, label2id=label2id)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/414M [00:00<?, ?B/s]

Some weights of the model checkpoint at microsoft/beit-base-patch16-224-pt22k-ft22k were not used when initializing BeitForSemanticSegmentation: ['classifier.bias', 'classifier.weight', 'beit.pooler.layernorm.bias', 'beit.pooler.layernorm.weight']
- This IS expected if you are initializing BeitForSemanticSegmentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BeitForSemanticSegmentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BeitForSemanticSegmentation were not initialized from the model checkpoint at microsoft/beit-base-patch16-224-pt22k-ft22k and are newly initialized: ['fpn1.1.weight', 'decode_head.psp_modules.2.1.bn.weight', 'decode_head.psp_modules.3.1.bn.running

In [None]:
# Defining the training hyperparameters in TrainingArguments. 
training_args = TrainingArguments(
    output_dir="beit-base-224-liver-cancer",
    learning_rate=6e-5,
    max_steps=20_000,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    evaluation_strategy="steps",
    save_strategy="steps",
    save_steps=1000,
    eval_steps=1000,
    logging_steps=1,
    remove_unused_columns=False,
    push_to_hub=True,
    hub_private_repo=True,
    load_best_model_at_end=True,
)

In [None]:
# Passing the training arguments to the Trainer along with the model, dataset, tokenizer, data collator, and compute_metrics function.
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=val,
    compute_metrics=compute_metrics,
)

/content/beit-base-224-liver-cancer is already a clone of https://huggingface.co/trpakov/beit-base-224-liver-cancer. Make sure you pull the latest changes with `repo.git_pull()`.


In [None]:
# Calling train() to finetune the model.
train_results = trainer.train()



Step,Training Loss,Validation Loss,Mean Iou,Mean Accuracy,Overall Accuracy,Accuracy Liver,Accuracy Tumor,Iou Liver,Iou Tumor
1000,0.0255,0.042474,0.455954,0.864275,0.886498,0.890094,0.838457,0.883121,0.484741
2000,0.0211,0.028375,0.537158,0.864755,0.939434,0.95152,0.777989,0.940458,0.671017
3000,0.0031,0.023304,0.548754,0.911583,0.940755,0.945476,0.877689,0.938512,0.70775
4000,0.0365,0.021045,0.55764,0.863955,0.945982,0.959257,0.768652,0.94571,0.727209
5000,0.017,0.019199,0.568259,0.897278,0.948047,0.956263,0.838294,0.947627,0.757151
6000,0.0017,0.018442,0.572296,0.910161,0.952081,0.958865,0.861457,0.950685,0.766204
7000,0.0043,0.017721,0.57168,0.888499,0.950136,0.960112,0.816887,0.948855,0.766185
8000,0.0004,0.016386,0.580894,0.912607,0.949708,0.955712,0.869502,0.947521,0.795162
9000,0.0114,0.019818,0.566488,0.93916,0.939785,0.939886,0.938433,0.936535,0.762928
10000,0.0196,0.016029,0.590639,0.933738,0.965541,0.970688,0.896788,0.963893,0.808025


  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label


In [None]:
# Uploading the trained model to the HuggingFace Hub so it can easily be used for inference
trainer.save_model()

To https://huggingface.co/trpakov/beit-base-224-liver-cancer
   3994daa..7a7b1c2  main -> main

   3994daa..7a7b1c2  main -> main

To https://huggingface.co/trpakov/beit-base-224-liver-cancer
   7a7b1c2..19a05cb  main -> main

   7a7b1c2..19a05cb  main -> main

