In [2]:
import os
import argparse
import json
import math
import os
import random
from pprint import pformat
import time

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

import nltk
import datasets
import evaluate

import transformers
from transformers import (
    AutoModelForSeq2SeqLM,
    AutoModelForCausalLM,
    AutoTokenizer,
    SchedulerType,
    get_scheduler,
    set_seed,
)

from accelerate import Accelerator
from accelerate.utils import set_seed
from datasets import load_dataset

import wandb
from tqdm.auto import tqdm, trange
from loguru import logger

from adapters import LlamaAdapterModel, T5AdapterModel

import peft_comparison
import peft_comparison.text2text_utils
import peft_comparison.mappings
from peft_comparison.collation import DataCollatorForSeq2SeqWithMetadata, DataCollatorForCausalLMWithMetadata

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model = T5AdapterModel.from_pretrained("t5-small")

In [4]:
model.add_adapter("adapter", config="lora")

In [5]:
from adapters.lora import LoRA

In [6]:
model.transformer.encoder.block[0].layer[0].SelfAttention.k.loras.keys()

odict_keys([])

In [7]:
lora_layer = model.transformer.encoder.block[0].layer[0].SelfAttention.q.loras["adapter"]
print(type(lora_layer))
lora_layer.lora_A

<class 'adapters.lora.LoRA'>


Parameter containing:
tensor([[-2.0735e-02,  8.0785e-05, -1.6535e-02,  ...,  3.2640e-02,
          3.4925e-02, -2.3914e-02],
        [-3.5664e-02,  2.1651e-02,  3.7073e-02,  ..., -2.1957e-02,
         -3.5951e-02,  3.2999e-02],
        [ 2.1858e-02, -3.3383e-02, -3.6094e-02,  ..., -3.5184e-02,
          2.3945e-02,  5.1780e-03],
        ...,
        [ 4.3645e-02,  1.0507e-02,  3.6210e-02,  ...,  3.8441e-02,
          2.1456e-02,  2.3936e-02],
        [-3.3017e-02,  1.5436e-02, -2.4072e-02,  ..., -2.7966e-02,
         -3.3491e-03, -1.6739e-02],
        [ 1.9498e-03,  2.1769e-02, -4.0025e-02,  ...,  5.6638e-03,
          1.0099e-02,  2.1497e-02]], requires_grad=True)

In [8]:
dir(lora_layer)

['T_destination',
 '__annotations__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_backward_hooks',
 '_backward_pre_hooks',
 '_buffers',
 '_call_impl',
 '_compiled_call_impl',
 '_forward_hooks',
 '_forward_hooks_always_called',
 '_forward_hooks_with_kwargs',
 '_forward_pre_hooks',
 '_forward_pre_hooks_with_kwargs',
 '_get_backward_hooks',
 '_get_backward_pre_hooks',
 '_get_name',
 '_is_full_backward_hook',
 '_load_from_state_dict',
 '_load_state_dict_post_hooks',
 '_load_state_dict_pre_hooks',
 '_maybe_warn_non_full_backward_hook',
 '_modules',
 '_named_members',
 '_non_persistent_buffers_se

In [9]:
model

T5AdapterModel(
  (transformer): T5Model(
    (shared): Embedding(32128, 512)
    (encoder): T5StackWithAdapters(
      (embed_tokens): Embedding(32128, 512)
      (block): ModuleList(
        (0): T5Block(
          (layer): ModuleList(
            (0): T5LayerSelfAttentionWithAdapters(
              (SelfAttention): T5AttentionWithAdapters(
                (q): Linear(
                  in_features=512, out_features=512, bias=False
                  (loras): ModuleDict(
                    (adapter): LoRA(512, 8, 512)
                  )
                )
                (k): Linear(
                  in_features=512, out_features=512, bias=False
                  (loras): ModuleDict()
                )
                (v): Linear(
                  in_features=512, out_features=512, bias=False
                  (loras): ModuleDict(
                    (adapter): LoRA(512, 8, 512)
                  )
                )
                (o): Linear(in_features=512, out_features=512, bia

In [10]:
import peft

In [11]:
model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")

In [12]:
model

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Drop

In [13]:
peft_config = peft.LoraConfig(
    target_modules=["k", "q", "v", "o", "wi", "wo"],
)
model = peft.get_peft_model(model, peft_config)

In [17]:
for name, param in model.named_parameters():
    if "lora" not in name and param.requires_grad:
        raise ValueError(f"Parameter {name} requires grad but is not in LoRA")


In [14]:
trainable_param_names = [n for n, p in model.named_parameters() if p.requires_grad]

In [15]:
trainable_param_names

['base_model.model.encoder.block.0.layer.0.SelfAttention.q.lora_A.default.weight',
 'base_model.model.encoder.block.0.layer.0.SelfAttention.q.lora_B.default.weight',
 'base_model.model.encoder.block.0.layer.0.SelfAttention.k.lora_A.default.weight',
 'base_model.model.encoder.block.0.layer.0.SelfAttention.k.lora_B.default.weight',
 'base_model.model.encoder.block.0.layer.0.SelfAttention.v.lora_A.default.weight',
 'base_model.model.encoder.block.0.layer.0.SelfAttention.v.lora_B.default.weight',
 'base_model.model.encoder.block.0.layer.0.SelfAttention.o.lora_A.default.weight',
 'base_model.model.encoder.block.0.layer.0.SelfAttention.o.lora_B.default.weight',
 'base_model.model.encoder.block.0.layer.1.DenseReluDense.wi.lora_A.default.weight',
 'base_model.model.encoder.block.0.layer.1.DenseReluDense.wi.lora_B.default.weight',
 'base_model.model.encoder.block.0.layer.1.DenseReluDense.wo.lora_A.default.weight',
 'base_model.model.encoder.block.0.layer.1.DenseReluDense.wo.lora_B.default.weigh