In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames[:1]:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/lensezip/lenses/no_sub/image_303470882091450624806213444518141820449.jpg
/kaggle/input/lensezip/lenses/sub/image_189448974206662542797032450204033033.jpg


In [2]:
from transformers import AutoImageProcessor

checkpoint = "google/vit-base-patch16-224-in21k"
image_processor = AutoImageProcessor.from_pretrained(checkpoint)

Downloading (…)rocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

In [3]:
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
import torch
normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
size = (
    image_processor.size["shortest_edge"]
    if "shortest_edge" in image_processor.size
    else (image_processor.size["height"], image_processor.size["width"])
)
_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])

In [4]:
def transforms(examples):
    examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
    del examples["image"]
    return examples

In [5]:
import os
import numpy as np
from datasets import Dataset
from PIL import Image

label_to_str = {0: 'no_sub', 1: 'sub'}
str_to_label = {'sub': 1, 'no_sub': 0}

def create_dataset(folder_path, train_ratio=0.9):
    def get_label(file_path):
        # Extract the label from the file path
        parts = os.path.split(file_path)
        if parts[-2].split('/')[-1] == "sub":
            return 1
        else:
            return 0

    # Get the list of all image file paths
    sub_dir = os.path.join(folder_path, "sub")
    no_sub_dir = os.path.join(folder_path, "no_sub")

    sub_files = [os.path.join(sub_dir, f) for f in os.listdir(sub_dir) if f.endswith(".jpg")]
    no_sub_files = [os.path.join(no_sub_dir, f) for f in os.listdir(no_sub_dir) if f.endswith(".jpg")]

    all_files = sub_files + no_sub_files

    # Shuffle the file paths and split them into training and testing sets
    np.random.shuffle(all_files)
    split_idx = int(len(all_files) * train_ratio)
    train_files = all_files[:split_idx]
    test_files = all_files[split_idx:]

    # Create a list of dictionaries containing file paths and their corresponding labels
    train_data = [{"file_path": file_path, "label": get_label(file_path)} for file_path in train_files]
    test_data = [{"file_path": file_path, "label": get_label(file_path)} for file_path in test_files]

    # Convert the list of dictionaries to Hugging Face's Dataset objects
    train_ds = Dataset.from_dict({"file_path": [item["file_path"] for item in train_data],
                                  "label": [item["label"] for item in train_data],
                                 "image": [Image.open(item["file_path"]) for item in train_data]})
    test_ds = Dataset.from_dict({"file_path": [item["file_path"] for item in test_data],
                                 "label": [item["label"] for item in test_data],
                                "image": [Image.open(item["file_path"]) for item in test_data]})

    return train_ds, test_ds

folder_path = "/kaggle/input/lensezip/lenses"
train_ds, test_ds = create_dataset(folder_path)

In [6]:
label_counts = {}
for example in test_ds:
    label = example["label"]
    if label in label_counts:
        label_counts[label] += 1
    else:
        label_counts[label] = 1

# Print the label counts
print(label_counts)

{1: 496, 0: 504}


In [7]:
train_ds = train_ds.with_transform(transforms)

In [8]:
test_ds = test_ds.with_transform(transforms)

In [9]:
from transformers import DefaultDataCollator
from scipy.special import softmax
data_collator = DefaultDataCollator()
from datasets import load_metric

metric = load_metric("roc_auc")
def compute_metrics(eval_pred):
    """Computes accuracy on a batch of predictions"""
#     print(eval_pred.predictions)
#     predictions = np.argmax(eval_pred.predictions, axis=1)

    probabilities = softmax(eval_pred.predictions, axis=1)[:,1]
    return metric.compute(prediction_scores=probabilities, references=eval_pred.label_ids)

Downloading builder script:   0%|          | 0.00/3.20k [00:00<?, ?B/s]

In [10]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
id_dict = {0:'no_sub', 1:'sub'}
str_to_id = {'sub':1,'no_sub':0}
model = AutoModelForImageClassification.from_pretrained(
    checkpoint,
    num_labels=2,
    id2label=id_dict,
    label2id=str_to_id,
)

Downloading pytorch_model.bin:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
training_args = TrainingArguments(
    output_dir="vit-base",
    remove_unused_columns=False,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=16,
    num_train_epochs=20,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="roc_auc",
    push_to_hub=False,
)

In [12]:
from transformers import EarlyStoppingCallback
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    tokenizer=image_processor,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],  # Stop training if the model is not improving for 3 consecutive evaluations
)

In [13]:
trainer.train()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss,Roc Auc
0,0.5153,0.476675,0.882048
1,0.512,0.458021,0.890265
2,0.449,0.33015,0.935116
4,0.3606,0.436731,0.930236
4,0.2975,0.228494,0.968966
5,0.2515,0.240947,0.967926
6,0.244,0.18565,0.978903
8,0.2021,0.253686,0.966778
8,0.1877,0.291563,0.965978
9,0.1845,0.212881,0.975734


TrainOutput(global_step=1407, training_loss=0.3433399657111856, metrics={'train_runtime': 2028.5297, 'train_samples_per_second': 88.734, 'train_steps_per_second': 1.38, 'total_flos': 6.97427906531328e+18, 'train_loss': 0.3433399657111856, 'epoch': 10.0})

In [16]:
best_model = trainer.model
best_metrics = trainer.evaluate(test_ds)
print("Best model ROC-AUC score on the test set:", best_metrics["eval_roc_auc"])

# Save the best model
best_model.save_pretrained("vit_base")

# Load the best model


Best model ROC-AUC score on the test set: 0.9775785650281618


NameError: name 'TFAutoModelForImageClassification' is not defined

In [19]:
loaded_model = AutoModelForImageClassification.from_pretrained("vit_base")

In [23]:
from IPython.display import FileLink 

In [25]:
!FileLink("vit_base")

/bin/bash: -c: line 0: syntax error near unexpected token `"vit_base"'
/bin/bash: -c: line 0: `FileLink("vit_base")'


## Visulization

In [None]:
import matplotlib.pyplot as plt


# Extract the training and evaluation history
training_history = trainer.state.log_history

# Extract the training loss, evaluation loss, and evaluation ROC-AUC
train_loss = [entry['loss'] for entry in training_history if 'loss' in entry]
eval_loss = [entry['eval_loss'] for entry in training_history if 'eval_loss' in entry]
eval_roc_auc = [entry['eval_roc_auc'] for entry in training_history if 'eval_roc_auc' in entry]

# Plot the training loss
plt.plot(train_loss, label="Training Loss")
plt.xlabel("Training Steps")
plt.ylabel("Loss")
plt.title("Training Loss")
plt.legend()
plt.show()

# Plot the evaluation loss and ROC-AUC
epochs = list(range(1, len(eval_loss) + 1))
fig, ax1 = plt.subplots()

ax1.set_xlabel("Epochs")
ax1.setylabel("Loss")
ax1.plot(epochs, eval_loss, label="Evaluation Loss", color='tab:red')
ax1.tick_params(axis='y', labelcolor='tab:red')
ax1.legend(loc='upper left')

ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis
ax2.set_ylabel("ROC-AUC")
ax2.plot(epochs, eval_roc_auc, label="Evaluation ROC-AUC", color='tab:blue')
ax2.tick_params(axis='y', labelcolor='tab:blue')
ax2.legend(loc='upper right')

fig.tight_layout() # otherwise the right y-label is slightly clipped
plt.title("Evaluation Loss and ROC-AUC")
plt.show()
