In [69]:
from transformers import pipeline, set_seed, GPT2Tokenizer, GPT2LMHeadModel
from torch import tensor, numel
from bertviz import model_view
set_seed(42)


In [70]:
generator = pipeline( 'text-generation', model='gpt2' )
generator ("Hello, I'm a language model and I", max_length=30, num_return_sequences=3)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[{'generated_text': "Hello, I'm a language model and I want to be able to understand these things. It's not just a game, it's a process."},
 {'generated_text': "Hello, I'm a language model and I'm not just talking about semantics and syntax. One of the things I find interesting about this is that you"},
 {'generated_text': "Hello, I'm a language model and I've come up with something that doesn't suck – what you do when you try and figure out what you"}]

In [71]:
tokenizer=GPT2Tokenizer.from_pretrained("gpt2")
"vishal" in tokenizer.get_vocab()

False

In [72]:
tokenizer.convert_ids_to_tokens(tokenizer.encode("I am Vishal"))

['I', 'Ġam', 'ĠVish', 'al']

In [73]:
encode=tokenizer.encode("I am Vishal",return_tensors="pt")

In [74]:
model=GPT2LMHeadModel.from_pretrained("gpt2")

In [75]:
model

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [76]:
model.lm_head

Linear(in_features=768, out_features=50257, bias=False)

In [77]:
params=0
for i in model.parameters():
    params+=numel(i)
print(params)

124439808
