In [1]:
import os
import pickle
from contextlib import nullcontext
import torch
import tiktoken
from model import GPTConfig, GPT

In [2]:
# -----------------------------------------------------------------------------
init_from = 'resume' # either 'resume' (from an out_dir) or a gpt2 variant (e.g. 'gpt2-xl')
out_dir = 'out-mk-src' # ignored if init_from is not 'resume'
start = "A man is the swordking." # or "<|endoftext|>" or etc. Can also specify a file, use as: "FILE:prompt.txt"
num_samples = 1 # number of samples to draw
max_new_tokens = 500 # number of tokens generated in each sample
temperature = 0.8 # 1.0 = no change, < 1.0 = less random, > 1.0 = more random, in predictions
top_k = 200 # retain only the top_k most likely tokens, clamp others to have 0 probability
seed = 1337
device = 'cuda' # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1', etc.
dtype = 'bfloat16' if torch.cuda.is_bf16_supported() else 'float16' # 'float32' or 'bfloat16' or 'float16'
compile = False # use PyTorch 2.0 to compile the model to be faster
# exec(open('configurator.py').read()) # overrides from command line or config file
# -----------------------------------------------------------------------------

torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
device_type = 'cuda' if 'cuda' in device else 'cpu' # for later use in torch.autocast
ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
ctx = nullcontext() if device_type == 'cpu' else torch.amp.autocast(device_type=device_type, dtype=ptdtype)

# model
if init_from == 'resume':
    # init from a model saved in a specific directory
    ckpt_path = os.path.join(out_dir, 'ckpt.pt')
    checkpoint = torch.load(ckpt_path, map_location=device)
    gptconf = GPTConfig(**checkpoint['model_args'])
    model = GPT(gptconf)
    state_dict = checkpoint['model']
    unwanted_prefix = '_orig_mod.'
    for k,v in list(state_dict.items()):
        if k.startswith(unwanted_prefix):
            state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
    model.load_state_dict(state_dict)
elif init_from.startswith('gpt2'):
    # init from a given GPT-2 model
    model = GPT.from_pretrained(init_from, dict(dropout=0.0))

model.eval()
model.to(device)
if compile:
    model = torch.compile(model) # requires PyTorch 2.0 (optional)

# look for the meta pickle in case it is available in the dataset folder
load_meta = False
if init_from == 'resume' and 'config' in checkpoint and 'dataset' in checkpoint['config']: # older checkpoints might not have these...
    meta_path = os.path.join('data', checkpoint['config']['dataset'], 'meta.pkl')
    load_meta = os.path.exists(meta_path)
if load_meta:
    print(f"Loading meta from {meta_path}...")
    with open(meta_path, 'rb') as f:
        meta = pickle.load(f)
    # TODO want to make this more general to arbitrary encoder/decoder schemes
    stoi, itos = meta['stoi'], meta['itos']
    encode = lambda s: [stoi[c] for c in s]
    decode = lambda l: ''.join([itos[i] for i in l])
else:
    # ok let's assume gpt-2 encodings by default
    print("No meta.pkl found, assuming GPT-2 encodings...")
    enc = tiktoken.get_encoding("gpt2")
    encode = lambda s: enc.encode(s, allowed_special={"<|endoftext|>"})
    decode = lambda l: enc.decode(l)

# encode the beginning of the prompt
if start.startswith('FILE:'):
    with open(start[5:], 'r', encoding='utf-8') as f:
        start = f.read()
        
# start_custom = "mortal kombat character's backstories of" + start
start_ids = encode(start)
# start_ids = encode(start_custom)
x = (torch.tensor(start_ids, dtype=torch.long, device=device)[None, ...])

# run generation
with torch.no_grad():
    with ctx:
        for k in range(num_samples):
            y = model.generate(x, max_new_tokens, temperature=temperature, top_k=top_k)
            text = decode(y[0].tolist())
            with open("testOutput.txt", "w") as file:
                # Write the text to the file
                file.write(text)

            print("Text saved to file.")
            print(text)
            print('---------------')
            

number of parameters: 29.94M
No meta.pkl found, assuming GPT-2 encodings...
Text saved to file.
A man is the swordking. He has the appearance of the protagonists when the appearance of the appearance of evil. The name and take time of the second tournament, but his ancestors for the original timeline, but had him to find the tenth while fighting for his demise. His old age, he has also been seen in the Netherrealm, even though the merger, as a mysterious entitiesaturally stretched into a thorough guise to his true forms. The hat can quickly became part. His primary weapon is his soul had his true lizard form. As such, he possesses the protagonists when he's powers extend to using it as he is able to shape-living objects he can even further his soul energy. He taken lightly, he has access the form of dark substance or smoke in MK11, but his armor increased and lava and bone. In the Dragon King Onaga's wraith has also given his teleporting by Raiden's Konquest mode. Noob has been establi

In [3]:
text

"A man is the swordking. He has the appearance of the protagonists when the appearance of the appearance of evil. The name and take time of the second tournament, but his ancestors for the original timeline, but had him to find the tenth while fighting for his demise. His old age, he has also been seen in the Netherrealm, even though the merger, as a mysterious entitiesaturally stretched into a thorough guise to his true forms. The hat can quickly became part. His primary weapon is his soul had his true lizard form. As such, he possesses the protagonists when he's powers extend to using it as he is able to shape-living objects he can even further his soul energy. He taken lightly, he has access the form of dark substance or smoke in MK11, but his armor increased and lava and bone. In the Dragon King Onaga's wraith has also given his teleporting by Raiden's Konquest mode. Noob has been established that Shao Kahn is a human of a human of a human of a human of a human of smoke is a human 