In [2]:
from transformers import GPT2LMHeadModel as GPT2

In [1]:
from transformers import pipeline, set_seed
generator = pipeline('text-generation', model='gpt2')
set_seed(42)
generator("The answer to the ultimate question of life, what is a language model?", max_length=30, num_return_sequences=5)

Device set to use mps:0
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=30) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[{'generated_text': 'The answer to the ultimate question of life, what is a language model? The answer to the question of language is not a matter of semantics but of what is the language model. This is a question of the essence of language.\n\nThe language model is a matter of understanding the concepts in a language model. The understanding of the concepts is a matter of understanding the laws of the language with which the language model is based. The understanding of the laws of the language with which the language model is based is a matter of understanding the laws of the language with which the language model is based. Language models are not the same as linguistic models. They are different.\n\nThe relationship between language models and the laws of the language is one of understanding the concepts in a language model. The understanding of the concepts is not the same as understanding the laws of the language with which the language model is based.\n\nThe understanding of the 

In [7]:
model_hf = GPT2.from_pretrained("gpt2")
for k,v in model.state_dict().items():
    print (f"Param: {k}, Shape: {v.shape}")

Param: transformer.wte.weight, Shape: torch.Size([50257, 768])
Param: transformer.wpe.weight, Shape: torch.Size([1024, 768])
Param: transformer.h.0.ln_1.weight, Shape: torch.Size([768])
Param: transformer.h.0.ln_1.bias, Shape: torch.Size([768])
Param: transformer.h.0.attn.c_attn.weight, Shape: torch.Size([768, 2304])
Param: transformer.h.0.attn.c_attn.bias, Shape: torch.Size([2304])
Param: transformer.h.0.attn.c_proj.weight, Shape: torch.Size([768, 768])
Param: transformer.h.0.attn.c_proj.bias, Shape: torch.Size([768])
Param: transformer.h.0.ln_2.weight, Shape: torch.Size([768])
Param: transformer.h.0.ln_2.bias, Shape: torch.Size([768])
Param: transformer.h.0.mlp.c_fc.weight, Shape: torch.Size([768, 3072])
Param: transformer.h.0.mlp.c_fc.bias, Shape: torch.Size([3072])
Param: transformer.h.0.mlp.c_proj.weight, Shape: torch.Size([3072, 768])
Param: transformer.h.0.mlp.c_proj.bias, Shape: torch.Size([768])
Param: transformer.h.1.ln_1.weight, Shape: torch.Size([768])
Param: transformer.h.

In [2]:
from gpt2 import GPT as gpt
model = gpt.from_pretrained('gpt2')

Loading weights from Hugging Face model: gpt2
Weights loaded successfully.


In [3]:
import jax
import jax.numpy as jnp
from flax import nnx
import tiktoken


# --- 2. Initialize the tokenizer ---
enc = tiktoken.get_encoding("gpt2")

# --- 3. Encode a prompt ---
prompt_text = "The answer to the ultimate question of life, the universe, and "
prompt_tokens = enc.encode_ordinary(prompt_text)
print(f"Prompt tokens: {prompt_tokens}")

# --- 4. Format the input tensor ---
# The model expects a batch dimension, so shape is (1, T)
input_idx = jnp.array([prompt_tokens])

# --- 5. Set up for generation ---
# Create a new RNGs object specifically for generation
rng_key = jax.random.key(42)
rngs = nnx.Rngs(default=rng_key) # Use 'default' as generate expects it

# --- 6. Generate text ---
print("Generating text...")
# The first argument is the tokenized prompt
# The second is the number of new tokens to generate
output_tokens = model.generate(
    input_idx,
    new_tokens=50,
    temperature=0.7,
    top_k=40,
    rngs=rngs
)
print(f"Output Tokens: {output_tokens}")
# The output includes the prompt, so we can print it all
generated_text = enc.decode(output_tokens[0].tolist())

print("\n--- Generated Text ---")
print(generated_text)

Prompt tokens: [464, 3280, 284, 262, 8713, 1808, 286, 1204, 11, 262, 6881, 11, 290, 220]
Generating text...
Output Tokens: [[ 464 3280  284  262 8713 1808  286 1204   11  262 6881   11  290  220
  1849  464 1917  351  257 1285   11  287  262  471   13  632  338  257
   737  198  464  691  257    8  198  198  198  198  198  198  198  198
   198  198  198  198  198  198  198  198  198  198  198  198  198  198
   198  198  198  198  198  198  198  198]]

--- Generated Text ---
The answer to the ultimate question of life, the universe, and  The problem with a week, in the U. It's a).
The only a)
































In [None]:
import torch
from transformers import GPT2LMHeadModel, AutoTokenizer

# 1. Set the device (use GPU if available)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# 2. Load the tokenizer and pre-trained model
tokenizer = AutoTokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2').to(device)

# GPT-2 doesn't have a default padding token. We'll use the end-of-sentence token.
tokenizer.pad_token = tokenizer.eos_token

# 3. Define the prompt
prompt_text = "The answer to the ultimate question of life, the universe, and"

# 4. Encode the prompt
# The tokenizer returns a dictionary, we need the 'input_ids' tensor
inputs = tokenizer(prompt_text, return_tensors='pt')
input_ids = inputs.input_ids.to(device)

# 5. Generate text
# We use similar parameters to your JAX implementation for a fair comparison
# Note: do_sample=True is crucial to enable temperature and top_k sampling
print("Generating text with Hugging Face model...")
output_sequences = model.generate(
    #input_ids=input_ids,
    **inputs,
    max_new_tokens=50,
    temperature=0.7,
    top_k=40,
    do_sample=True,
    pad_token_id=tokenizer.eos_token_id # Suppress a warning
)

# 6. Decode the output
# The output contains the prompt, so we decode the whole sequence
generated_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)

print("\n--- Hugging Face Generated Text ---")
print(generated_text)


Using device: cpu
