In [2]:
import torch
print(torch.__version__)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
import torch.nn as nn
from torch.nn import functional as F
block_size = 8
batch_size = 4
max_iters = 10000
#eval_interal = 2500
learning_rate = 3e+4
eval_iters = 250
dropout = 0.2

2.0.1+cu117
cpu


In [18]:
with open("wizardOfOz.txt" ,"r",encoding = "utf-8") as f:
          text = f.read()
chars = sorted(set(text))
print(chars)
vocab_size = len(chars)

['\n', ' ', '!', '"', '#', '&', "'", '(', ')', '*', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\ufeff']


In [6]:
string_to_int = {ch:i for i, ch in enumerate(chars)}
int_to_string = { i:ch for i ,ch in enumerate(chars)}
encode = lambda s:[string_to_int[c] for c in s]
decode = lambda l: ''.join([int_to_string[i] for i in l])

data = torch.tensor(encode(text), dtype = torch.long)
print(data[:100])


tensor([82, 46, 63, 60,  1, 42, 73, 70, 65, 60, 58, 75,  1, 33, 76, 75, 60, 69,
        57, 60, 73, 62,  1, 60, 28, 70, 70, 66,  1, 70, 61,  1, 30, 70, 73, 70,
        75, 63, 80,  1, 56, 69, 59,  1, 75, 63, 60,  1, 49, 64, 81, 56, 73, 59,
         1, 64, 69,  1, 41, 81,  0,  1,  1,  1,  1,  0, 46, 63, 64, 74,  1, 60,
        57, 70, 70, 66,  1, 64, 74,  1, 61, 70, 73,  1, 75, 63, 60,  1, 76, 74,
        60,  1, 70, 61,  1, 56, 69, 80, 70, 69])


In [7]:
n = int(0.8*len(data))
train_data = data[:n]
val_data= data[n:]

def get_batch(split):
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data)-block_size,(batch_size,))
    print(ix)
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    x,y = x.to(device),y.to(device)
    return x,y

x,y = get_batch('train')
print('inputs:')
print(x)
print('targets:')
print(y)

tensor([ 21245,  92060, 183692,  24047])
inputs:
tensor([[80,  1, 75, 76, 68, 57, 67, 60],
        [74, 76, 58, 63,  1, 67, 64, 62],
        [ 1, 75, 63, 56, 75,  1, 57, 60],
        [78, 56, 74,  1, 62, 70, 73, 62]])
targets:
tensor([[ 1, 75, 76, 68, 57, 67, 60, 59],
        [76, 58, 63,  1, 67, 64, 62, 63],
        [75, 63, 56, 75,  1, 57, 60, 56],
        [56, 74,  1, 62, 70, 73, 62, 60]])


In [48]:
@torch.no_grad()
def estimate_loss():
    out = {}
    model.eval()
    for split in ['train','val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            x,y = get_batch(split)
            logits,loss = model(x,y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

In [55]:
class BigramLanguageModel(nn.Module):
    def __init__(self,vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size,vocab_size)
    def forward(self,index,targets=None):
        logits = self.token_embedding_table(index)
        if targets is None:
            loss = None
        else:
            B,T,C = logits.shape
            logits = logits.view(B*T,C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits,targets)
        return logits, loss
    def generate(self,index,max_new_tokens):
        #index is (B,T)array of indices in the current context 
        for _ in range(max_new_tokens):
            #get the predictions
            logits, loss = self.forward(index)
            #focus only on the last timestep
            logits = logits[:,-1,:]
            probs = F.softmax(logits,dim= -1)
            index_next = torch.multinomial(probs,num_samples = 1)
            index = torch.cat((index,index_next),dim =1)
        return index

model = BigramLanguageModel(vocab_size)
m = model.to(device)
context = torch.zeros((1,1),dtype = torch.long,device = device)
generated_chars = decode(m.generate(context,max_new_tokens = 500)[0].tolist())
print(generated_chars)


:'w/KDv98AH;G;'(uHZ_n#1F37OV0ChY]k,L(BVyenV2w Z.7a*W;GE.7L6 zh:
:8sUm[-rWnrfMbt﻿ua
a_,O!KDL2-M6lNE)3H8r6Bk. dgA?dq/KDfnv#]N[q4AfojM[nCGhpf9rFa'[HmdbKv9,1aW/h0QeIAQV/]u2-kk7i3r*VO(.,dzgS[O:ajwxTlo7,sJW-97uQrvo
2H9m_x1Wfv7'[0MjCz7)Zy1nZwa_(GA4(L/gmtM6&;xlf5D#hsn:ZE9vGwyTt﻿iHf""K?q#Xll9hffY]B7,-PkUmx;-k4uEHQB2hn'﻿UP]S5*9'T:DB3﻿1jrgpgwZ?3v#6['b4IxK&F
aSNnnG.ZBc4ICH'rtk]cp7mq*[IMsn'JT:F"sp_,8h-JNI*[-qCOd:,'CLxy.U!"!O!V-BA:x3u/ D3QlTW44ScZ.HF:Ek)xydsJOpZNCu[.pK7SZ-*!DbnAQCryE38#58qN4VOp7'N6/g:-q4ICxS7


In [52]:
optimizer = torch.optim.AdamW(model.parameters(),lr = learning_rate)
for iter in range(max_iters):
    if iter % eval_iters ==0:
        losses = estimate_loss()
        print(f"step:{iter},train loss:{losses['train']:.4f},val loss:{losses['val']:.4f}")
    #sample a batch of data 
    xb,yb = get_batch('train')
    #evaluate the loss
    logits,loss = model.forward(xb,yb)
    optimizer.zero_grad(set_to_none = True)
    loss.backward()
    optimizer.step()
print(loss.item())

tensor([110432,  91363,  85815,  27626])
tensor([ 37633,    394,  75385, 130477])
tensor([ 28260, 147129, 104414, 131424])
tensor([ 75206,  68169,  50574, 149487])
tensor([180835,  61959,  30379, 166776])
tensor([ 75648, 165078,  65423,  89788])
tensor([25123, 38641, 39921, 49161])
tensor([  4434,  91435,  49131, 156827])
tensor([ 72650, 127695,  17441, 128849])
tensor([ 22076,  90935,  66256, 119326])
tensor([168791, 154008,  93526,  63043])
tensor([ 63615, 114872,  68530, 103114])
tensor([184318, 159092,  18039, 115853])
tensor([25313, 64256, 52962, 29173])
tensor([160143,  75327, 106472, 117680])
tensor([137875, 123202, 172737,  73179])
tensor([ 26381, 163385,  68903,   4056])
tensor([  1263,  64253, 127805,  59665])
tensor([ 62068, 149659,  99558, 149258])
tensor([168931,  16573,  97033,  12694])
tensor([ 86095,   8819, 149550, 127153])
tensor([ 50590, 110394, 139074, 101272])
tensor([143135, 139814,  32276,  48861])
tensor([ 28959, 157848, 186087,  56883])
tensor([ 57070, 185062, 

In [57]:
context = torch.zeros((1,1),dtype = torch.long,device= device)
generated_chars = decode(m.generate(context,max_new_tokens=500)[0].tolist())
print(generated_chars)


(2sitAUu!GPFuu?OUAJksT"UqP9k;wWT"sTffrQ qnXhP]Ir9Jd::pERKt!d_qp3N3B9&b'1tLKV"FPa&db5
Aq﻿X!)Z1eloM_fOQ:B.wW﻿"NXhcSf﻿w L﻿Z1NGEoRV0!#4(#6ah2t9Vvn[5n,TZx3I8P.VdzfNDPH#/Hu/dIZw ZFlMeB'G1MsQ-(bUmcv#﻿T!/7cSZtkksT,E#J﻿)xdUdzg9z&K79]0q9Q#(:QG!:!bj?dxqN8a*
e?xl
﻿Kvi3-#(IqlN.)Pk7_IY!X[VE[Jq47mDQhVtbjhFl&DIahOb'rVBN[yrqCj['b-
RO!xT.)p8m5qRAJy.P
qY!;mD.w6aglL31e 7j
oDvi1RAk)hV2w1-#u[nKAJ[/.G W﻿5oa;.BCsSZ1V#V;v a/FAQG"﻿Tt/Y]kk9z/ ]i3﻿!Yc]mtLFHrlE#PH(WFEW﻿Sx/V'8EHr?zg(#Lg_! "P﻿#Xa]N!;NK8m,bJa*!-17tLj'#INV0M-qc
