from google.colab import drive
drive.mount('/content/drive/')
%cd /content/drive/My Drive/
from google.colab import files
files.upload()

In [None]:
from google.colab import drive
drive.mount('/content/drive/')
%cd /content/drive/My Drive/
from google.colab import files
files.upload()

In [24]:
import torch
from torch.utils.data import Dataset, TensorDataset
from mingpt.model import GPT
from mingpt.trainer import Trainer
import mingpt.bpe
import tqdm

TRAIN_ITERATIONS = 1000

TRAIN_BATCH_SIZE = 32

VOCAB_SIZE = 50257
BLOCK_SIZE = 64
LR = 5e-4

In [25]:
def init_model(vocabulary_size):
    model_config = GPT.get_default_config()
    model_config.model_type = 'gpt-nano'
    model_config.vocab_size = vocabulary_size
    model_config.block_size = BLOCK_SIZE
    gpt = GPT(model_config)
    return gpt

def init_trainer(model_to_train, data):
    train_config = Trainer.get_default_config()
    train_config.learning_rate = LR
    train_config.max_iters = TRAIN_ITERATIONS
    train_config.batch_size = TRAIN_BATCH_SIZE

    def batch_end_callback(train):
        if train.iter_num % 100 == 0:
            print(
                f"iter_dt {train.iter_dt * 1000:.2f}ms; iter {train.iter_num}: train loss "
                f"{train.loss.item():.5f}")

    trainer = Trainer(train_config, model_to_train, data)
    trainer.set_callback('on_batch_end', batch_end_callback)
    return trainer


def data_by_blocks(data, block_size):
    i = 0
    x = []
    y = []
    while i < len(data) - block_size:
        x.append(data[i:i + block_size])
        y.append(data[i + 1:i + block_size + 1])
        i += 1
    return x, y



In [26]:
def clean_string(input_string):
    output_string = ""
    for i in input_string:
        if i.isalnum() or i == " " or i in "!(),.:;-":
            output_string += i.lower()
    output_string = " ".join(output_string.split())
    return output_string



class TrainSet(Dataset):
    def __init__(self, tokens, labels):
        self.tokens = tokens
        self.labels = labels

    def __len__(self):
        return len(self.tokens)

    def __getitem__(self, idx):
        return self.tokens[idx], self.labels[idx]


In [27]:
if __name__ == '__main__':
    with open("alice_in_wonderland.txt") as f:
        dataset = f.read()
    
    dataset = clean_string(dataset)
    e = mingpt.bpe.BPETokenizer()
    tokenized_data = e(dataset)
    vocab_size = tokenized_data.unique().shape[0]
    x, y = data_by_blocks(tokenized_data[0], BLOCK_SIZE)
    x = torch.stack(x)
    y = torch.stack(y)
    dataset = TrainSet(x, y)
    # dataset = TrainSet(x, y)

    model = init_model(vocab_size)
    model_trainer = init_trainer(model, dataset)
    model_trainer.run()



number of parameters: 0.25M


RuntimeError: ignored

In [9]:
model.requires_grad_=False
model.eval()

GPT(
  (transformer): ModuleDict(
    (wte): Embedding(50257, 48)
    (wpe): Embedding(64, 48)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-2): 3 x Block(
        (ln_1): LayerNorm((48,), eps=1e-05, elementwise_affine=True)
        (attn): CausalSelfAttention(
          (c_attn): Linear(in_features=48, out_features=144, bias=True)
          (c_proj): Linear(in_features=48, out_features=48, bias=True)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((48,), eps=1e-05, elementwise_affine=True)
        (mlp): ModuleDict(
          (c_fc): Linear(in_features=48, out_features=192, bias=True)
          (c_proj): Linear(in_features=192, out_features=48, bias=True)
          (act): NewGELU()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((48,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=48, o

In [11]:
 prompt = e("I am")
 prompt = prompt.to("cuda")


In [18]:
cypher = model.generate(prompt, 40)

In [19]:
cypher

tensor([[  40,  716,  284,  307,  257, 1310, 1517,   11,  290,  673,  550,  587,
          287,  257, 1310, 1517,  284,  307,  257, 1310, 1517,  284,  307,  257,
         1310, 1517,   11,  290,  673,  550,  587,  284, 5223,   11,  290,  673,
          550,  587,  287,  257, 1310, 1517]], device='cuda:0')

In [20]:
for i in cypher:
  print(e.decode(i.cpu().squeeze()))

I am to be a little thing, and she had been in a little thing to be a little thing to be a little thing, and she had been to herself, and she had been in a little thing
