In [1]:
import math
import logging

import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F

from mingpt.dataset import CharDataset
from mingpt.utils import set_seed


In [2]:
# set up logging
logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
)

# make deterministic
set_seed(42)

In [3]:
# get input text, create pytorch Dataset
text = open('input.txt', 'r').read()
train_dataset = CharDataset(text, seq_len=128)

Data has 1115394 chars, 65 unique vocabs total


In [4]:
# get GPT model
from mingpt.model import GPT
from mingpt.config import GPTCommonConfig
model_conf = GPTCommonConfig(train_dataset.vocab_size, train_dataset.seq_len,
                  num_layer=8, num_head=8, embed_dim=512)
model = GPT(model_conf)

In [5]:
# get GPT Trainer
from mingpt.trainer import Trainer
from mingpt.config import TrainerConfig

trainer_conf = TrainerConfig(max_epochs=2, batch_size=512, learning_rate=6e-4,
                      lr_decay=True, warmup_tokens=512*20, final_tokens=2*len(train_dataset)*model.seq_len,
                      num_workers=4)

trainer = Trainer(model, train_dataset, None, trainer_conf)
trainer.train()

TypeError: object of type 'NoneType' has no len()

In [None]:
# alright, let's sample some character-level Shakespeare
from mingpt.utils import sample

context = "O God, O God!"
x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None,...].to(trainer.device)
y = sample(model, x, 2000, temperature=1.0, sample=True, top_k=10)[0]
completion = ''.join([train_dataset.itos[int(i)] for i in y])
print(completion)