# Fine Tuning model for instructions

load openai's medium model for better results

In [11]:
import torch
import tiktoken
import myllm.gpt as gpt
import myllm.util
import myllm.data as data

In [12]:
# Initialize model
device = torch.device("cpu")
tokenizer = tiktoken.get_encoding('gpt2')

gpt_config = myllm.util.gpt_config("gpt2-medium")

# openai runs with qkv bias
gpt_config.update({'qkv_bias': True})
model = gpt.GPTModel(gpt_config)

In [13]:
# load open weights
from gpt_download import download_and_load_gpt2

settings, params = download_and_load_gpt2(
    model_size="355M", models_dir="gpt2"
)

myllm.util.load_openai_weights_into_gpt(model, params)
model.eval()

File already exists and is up-to-date: gpt2/355M/checkpoint
File already exists and is up-to-date: gpt2/355M/encoder.json
File already exists and is up-to-date: gpt2/355M/hparams.json
File already exists and is up-to-date: gpt2/355M/model.ckpt.data-00000-of-00001
File already exists and is up-to-date: gpt2/355M/model.ckpt.index
File already exists and is up-to-date: gpt2/355M/model.ckpt.meta
File already exists and is up-to-date: gpt2/355M/vocab.bpe


GPTModel(
  (tok_emb): Embedding(50257, 1024)
  (pos_emb): Embedding(1024, 1024)
  (drop_emb): Dropout(p=0.1, inplace=False)
  (trf_blocks): Sequential(
    (0): TransformerBlock(
      (att): MultiHeadAttention(
        (W_query): Linear(in_features=1024, out_features=1024, bias=True)
        (W_key): Linear(in_features=1024, out_features=1024, bias=True)
        (W_value): Linear(in_features=1024, out_features=1024, bias=True)
        (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (ff): FeedForward(
        (layers): Sequential(
          (0): Linear(in_features=1024, out_features=4096, bias=True)
          (1): GELU()
          (2): Linear(in_features=4096, out_features=1024, bias=True)
        )
      )
      (norm1): LayerNorm()
      (norm2): LayerNorm()
      (drop_shortcut): Dropout(p=0.1, inplace=False)
    )
    (1): TransformerBlock(
      (att): MultiHeadAttention(
        (W_query): Linear(i

In [14]:
#data loaders and data
train_data, val_data, test_data = data.split_instruction_data("data/instruction-data.json")

In [15]:
# Assets pretrained performance

torch.manual_seed(123)
input_text = data.format_instruction_input(val_data[0])
print(input_text)

Bellow is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Convert the active sentence to passive: 'The chef cooks the meal every day.'


In [16]:
token_ids = model.generate(
    idx=myllm.util.text_to_token_ids(input_text, tokenizer),
    max_new_tokens=35,
    context_size=gpt_config["context_length"],
    eos_id=data.PAD_TOKEN_ID
)

generated_text = myllm.util.token_ids_to_text(token_ids, tokenizer)
repsonse_text = generated_text[len(input_text):].strip()
print(repsonse_text)

a d a passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive passive
