In [None]:
from transformers import AutoTokenizer, AutoConfig, AutoModelForPreTraining

In [None]:
MODEL = "gpt2"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL)

In [None]:
config = AutoConfig.from_pretrained(MODEL, 
    bos_token_id=tokenizer.bos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    sep_token_id=tokenizer.sep_token_id,
    pad_token_id=tokenizer.pad_token_id,
    output_hidden_states=False
)

In [None]:
model = AutoModelForPreTraining.from_pretrained(MODEL, config=config)

##### Example 1

In [None]:
tokenizer

GPT2TokenizerFast(name_or_path='gpt2', vocab_size=50257, model_max_length=1024, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>'}, clean_up_tokenization_spaces=True)

##### Example 2

In [None]:
SPECIAL_TOKENS = {
    "bos_token": "<|BOS|>",
    "eos_token": "<|EOS|>",
    "sep_token": "<|SEP|>"
}

In [None]:
type(model)

transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel

In [None]:
tokenizer

GPT2TokenizerFast(name_or_path='gpt2', vocab_size=50257, model_max_length=1024, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>'}, clean_up_tokenization_spaces=True)

In [None]:
SPECIAL_TOKENS

{'bos_token': '<|BOS|>', 'eos_token': '<|EOS|>', 'sep_token': '<|SEP|>'}

Add `SPECIAL_TOKENS` to `tokenizer` and handle the side effects in `model`

In [None]:
tokenizer.add_special_tokens(SPECIAL_TOKENS)

3

In [None]:
vocab_size = tokenizer.vocab_size

In [None]:
vocab_size

50257

In [None]:
model.resize_token_embeddings(vocab_size)

Embedding(50257, 768)

##### Example 3

In [None]:
len([x for x in model.parameters() if x.requires_grad])

148

In [None]:
type(model)

transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel

Fine-tune the last 6 transformer blocks

**Hint**: `model.transformer.x.parameters()`

In [None]:
for param in model.parameters():
    param.requires_grad = False

In [None]:
len(model.transformer.h)

12

In [None]:
model.transformer.h[0].parameters()

<generator object Module.parameters>

In [None]:
for i, block in enumerate(model.transformer.h):
    if i >= 6:
        for param in block.parameters():
            param.requires_grad = True

In [None]:
for param in model.transformer.ln_f.parameters():
    param.requires_grad = True

In [None]:
for param in model.lm_head.parameters():
    param.requires_grad = True

In [None]:
type(model)

transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel