In [2]:
import sys
import os
from pathlib import Path
sub_project_dir = Path(os.path.abspath(''))
project_dir = sub_project_dir.parent
sys.path.insert(0, project_dir.parent.as_posix())

import evaluate
import datasets
from tqdm import trange, tqdm
import numpy as np
import torch
from matplotlib import pyplot as plt
import timm
from transformers import DataCollatorWithPadding, Trainer, TrainingArguments
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler, BatchSampler

%matplotlib inline
%load_ext autoreload
%autoreload 2
from llm_papers.clip.dataset import load_cifar100, load_coco2017
from llm_papers.clip.model import CLIP, load_pretrained_roberta, load_pretrained_vit
from llm_papers.utils import device

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
tokenizer, pretrained_text_model = load_pretrained_roberta()
transform, pretrained_vision_model = load_pretrained_vit()

In [4]:
model = CLIP()
model.load(pretrained_text_model, pretrained_vision_model)
model.freeze()
model.to(device)

CLIP(
  (text_model): Roberta(
    (embeddings): RobertaEmbeddings(
      (word_embed): Embedding(50265, 768, padding_idx=1)
      (pos_embed): Embedding(514, 768, padding_idx=1)
      (type_embed): Embedding(1, 768, padding_idx=0)
      (norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    )
    (blocks): Sequential(
      (0): Block(
        (norm_attn): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (Wqkv): Linear(in_features=768, out_features=2304, bias=True)
          (Wo): Linear(in_features=768, out_features=768, bias=True)
        )
        (norm_mlp): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
        )
      )
      (1): Block(
        (norm_attn): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        

In [5]:
train_dataset = load_coco2017(transform, tokenizer)
test_dataset = load_cifar100(transform, tokenizer)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, pad_to_multiple_of=8)

In [None]:
def compute_metrics(eval_pred):
    (text_logits, image_logits), labels = eval_pred
    image_predictions = np.argmax(image_logits, -1)

    accuracy = evaluate.load("accuracy")
    results = accuracy.compute(predictions=image_predictions, references=labels)

    return results


def model_init():
    model.init_weights()
    return model


def collate_fn(features):
    batch = {}
    for key in features[0]:
        if key in ["input_ids", "attention_mask"]:
            continue
        batch[key] = torch.stack([f[key] for f in features])
    if "input_ids" not in features[0]:
        batch["input_ids"] = test_dataset.input_ids
        batch["attention_mask"] = test_dataset.attention_mask
    else:
        text_batch = data_collator(
            [
                {"input_ids": f["input_ids"], "attention_mask": f["attention_mask"]}
                for f in features
            ]
        )
        batch.update(text_batch)
    return batch


lr = 5e-4
batch_size = 256
epochs = 5
wd = 0.0
logging_steps = 10
eval_steps = 100
save_steps = 100
output_dir = (
    sub_project_dir
    / "checkpoints"
    / f"distilroberta_vit_b_32_224_coco2017_lr{lr}_ep{epochs}_bs{batch_size}_wd{wd}"
)
output_dir.mkdir(parents=True, exist_ok=True)
os.environ["WANDB_ENTITY"] = "ztzhu11"
os.environ["WANDB_PROJECT"] = "CLIP"
args = TrainingArguments(
    output_dir=output_dir.as_posix(),
    per_device_train_batch_size=batch_size // 8,
    gradient_accumulation_steps=8,
    num_train_epochs=epochs,
    learning_rate=lr,
    lr_scheduler_type="cosine_with_min_lr",
    lr_scheduler_kwargs={"min_lr_rate": 0.1},
    weight_decay=wd,
    seed=42,
    fp16=True,
    fp16_full_eval=True,
    eval_strategy="steps",
    eval_on_start=True,
    per_device_eval_batch_size=64,
    eval_steps=eval_steps,
    dataloader_num_workers=8,
    dataloader_prefetch_factor=2,
    dataloader_persistent_workers=True,
    save_strategy="steps",
    save_steps=save_steps,
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    log_level="info",
    logging_first_step=True,
    logging_steps=logging_steps,
    report_to="none",
    run_name=output_dir.name,
)
trainer = Trainer(
    None,
    args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    model_init=model_init,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
)

PyTorch: setting up devices


Using auto half precision backend


In [None]:
trainer.evaluate()


***** Running Evaluation *****
  Num examples = 10000
  Batch size = 64


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
[34m[1mwandb[0m: Currently logged in as: [33mztzhu1[0m ([33mztzhu11[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Could not log the number of model parameters in Weights & Biases due to an AttributeError.


{'eval_loss': 4.604645729064941,
 'eval_model_preparation_time': 0.002,
 'eval_accuracy': 0.0087,
 'eval_runtime': 10.1329,
 'eval_samples_per_second': 986.889,
 'eval_steps_per_second': 15.494}

