<a href="https://colab.research.google.com/github/yashsolanki162003/Workflow_of_ChatGPT_Model/blob/main/Workflow_of_ChatGPT_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q transformers sentence-transformers tiktoken faiss-cpu matplotlib

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m64.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os
import numpy as np
import tiktoken
import torch
import torch.nn.functional as F
from transformers import GPT2TokenizerFast, GPT2LMHeadModel
from sentence_transformers import SentenceTransformer
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (6,4)
def shortvec(v, n=8):
  """Return first n dimensions of a vector for compact printing."""
  return np.array(v).flatten()[:n].tolist()
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

Using device: cpu


In [None]:
prompt = "Hii"
print("User prompt:", repr(prompt))

User prompt: 'Hii'


In [None]:
enc = tiktoken.get_encoding("cl100k_base") # encoding used by many modern GPT models
tiktoken_ids = enc.encode(prompt)
tiktoken_strings = [enc.decode([tid]) for tid in tiktoken_ids]
print("tiktoken token IDs:", tiktoken_ids)
print("tiktoken token strings:", tiktoken_strings)
print("tiktoken token count:", len(tiktoken_ids))

tiktoken token IDs: [39, 3893]
tiktoken token strings: ['H', 'ii']
tiktoken token count: 2


In [None]:
gpt2_tok = GPT2TokenizerFast.from_pretrained("gpt2")
gpt2_tok.add_special_tokens({"pad_token": "[PAD]"})
# ensure pad token exists for safety in some operations

gpt2_ids = gpt2_tok.encode(prompt, add_special_tokens=False)
gpt2_tokens = gpt2_tok.convert_ids_to_tokens(gpt2_ids)
gpt2_decoded = gpt2_tok.decode(gpt2_ids)

print("GPT-2 token ids:", gpt2_ids)
print("GPT-2 token strings:", gpt2_tokens)
print("GPT-2 decoded:", repr(gpt2_decoded))

GPT-2 token ids: [39, 4178]
GPT-2 token strings: ['H', 'ii']
GPT-2 decoded: 'Hii'


In [None]:
embed_model = SentenceTransformer("all-MiniLM-L6-v2") # small, fast
prompt_emb = embed_model.encode(prompt, convert_to_numpy=True)
print("Prompt embedding shape:", prompt_emb.shape)
print("Prompt embedding (first dims):", shortvec(prompt_emb))

Prompt embedding shape: (384,)
Prompt embedding (first dims): [-0.1123691201210022, 0.1150505393743515, 0.029519375413656235, 0.002057114616036415, -0.0479264035820961, -0.07721183449029922, 0.08405883610248566, -0.0073411064222455025]


In [None]:
token_embs = [embed_model.encode(t, convert_to_numpy=True) for t in tiktoken_strings]
for i, (t, emb) in enumerate(zip(tiktoken_strings, token_embs)):
 print(f"Token {i} '{t}' -> dim {emb.shape} -> {shortvec(emb)}")

Token 0 'H' -> dim (384,) -> [-0.057101570069789886, 0.0941014438867569, -0.04296128824353218, 0.015190373174846172, 0.0035909090656787157, 0.019870700314641, 0.0812176913022995, 0.037185586988925934]
Token 1 'ii' -> dim (384,) -> [-0.029413770884275436, 0.017667163163423538, -0.014803135767579079, 0.05054305121302605, -0.03761046752333641, 0.005316558293998241, 0.10382639616727829, 0.05065855011343956]


In [None]:
from numpy.linalg import norm
for i, emb in enumerate(token_embs):
  sim = float(np.dot(prompt_emb, emb) / (norm(prompt_emb) * norm(emb)))
  print(f"cosine(prompt, token_{i}='{tiktoken_strings[i]}') = {sim:.4f}")

cosine(prompt, token_0='H') = 0.5304
cosine(prompt, token_1='ii') = 0.3688


In [None]:
model = GPT2LMHeadModel.from_pretrained("gpt2", attn_implementation="eager")
model.to(device)
model.eval()
model.config.output_hidden_states = True
model.config.output_attentions = True
input_ids = torch.tensor([gpt2_ids], dtype=torch.long).to(device)


with torch.no_grad():
 outputs = model(input_ids, output_hidden_states=True, output_attentions=True)


 logits = outputs.logits # (batch, seq_len, vocab_size)
 hidden_states = outputs.hidden_states # tuple: (embedding_output, layer1_out, ..., last_layer_out)
 attentions = outputs.attentions # tuple: one per layer (batch, num_heads, seq_len, seq_len)


 print("Logits shape:", logits.shape)
 print("Number of hidden-state tensors (incl embeddings):", len(hidden_states))
 print("Hidden state last layer shape:", hidden_states[-1].shape)
 print("Number of attention tensors (layers):", len(attentions))
 print("Attention shape (layer 0):", attentions[0].shape)

Logits shape: torch.Size([1, 2, 50257])
Number of hidden-state tensors (incl embeddings): 13
Hidden state last layer shape: torch.Size([1, 2, 768])
Number of attention tensors (layers): 12
Attention shape (layer 0): torch.Size([1, 12, 2, 2])
