In [1]:
import sys
import os

# 获取当前工作目录
current_dir = os.getcwd()

# 获取父目录（即 python_dir）
project_dir = os.path.dirname(current_dir)
sys.path.append(project_dir)


import numpy as np
import cupy as cp
import mytorch as torch
import mytorch.ops as ops
import mytorch.nn as nn
import mytorch.optim as optim
import mytorch.data as data
from mytorch.array_device import *
from mytorch.array_api import array_api
import mytorch.perfomance as pf

In [2]:
cp.random.seed(42)
x = torch.Tensor(cp.random.randint(0, 60, size=(1, 64)))
model = nn.TransformerDecoder(vocab_size=65, max_len=64, n_embd=256, n_head=8, n_layer=3, dropout=0.1,device=gpu())
out = model(x)
print(torch.Tensor.total_size)
print(out.shape)
out.backward()

20136960
(1, 64, 65)


In [3]:
x =torch.Tensor(np.random.randint(10, size=(5, 10)))
y = nn.Embedding(1000, 512)
z = y(x)
print(y.weight)
z.backward()
print(y.weight.grad)

[[0.13813812 0.4752619  0.81847996 ... 0.21780637 0.9987628  0.13273256]
 [0.4876242  0.63443345 0.5689269  ... 0.49042484 0.29066005 0.83576864]
 [0.8202983  0.64072603 0.08251388 ... 0.81879646 0.8278121  0.73016775]
 ...
 [0.8528449  0.22381213 0.60463625 ... 0.45365176 0.8378511  0.27057022]
 [0.852119   0.29379985 0.06153331 ... 0.740683   0.46846318 0.8050998 ]
 [0.47174126 0.6581016  0.8284623  ... 0.7179612  0.5521711  0.28439957]]
[[6. 6. 6. ... 6. 6. 6.]
 [3. 3. 3. ... 3. 3. 3.]
 [6. 6. 6. ... 6. 6. 6.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [4]:
father_dir = os.path.dirname(project_dir)
with open(father_dir+'/data/Shakespeare/input.txt', 'r', encoding='utf-8') as f:
    text = f.read()

# here are all the unique characters that occur in this text
chars = sorted(list(set(text)))
vocab_size = len(chars)
# create a mapping from characters to integers
stoi = { ch:i for i,ch in enumerate(chars) }
itos = { i:ch for i,ch in enumerate(chars) }
encode = lambda s: [stoi[c] for c in s] # encoder: take a string, output a list of integers
decode = lambda l: ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string

# Train and test splits
data = cp.array(encode(text), dtype="int")

In [5]:
batch_size = 256
max_len = 64
n_embd = 256
device = 'gpu'

optimizer=optim.Adam(model.parameters(),lr=0.001,weight_decay=0.001)
criterion= nn.SoftmaxLoss()

max_iters = 200
# data loading
def get_batch():
    # generate a small batch of data of inputs x and targets y
    ix = cp.random.randint(0, len(data) - max_len, size=(batch_size,))
    x = cp.stack([data[i:i+max_len] for i in ix])
    y = cp.stack([data[i+1:i+max_len+1] for i in ix])
    x,y= torch.Tensor(x), torch.Tensor(y)
    x.to(device)
    y.to(device)
    return x, y

In [None]:
for iter in range(max_iters):


    optimizer.reset_grad()
    # sample a batch of data
    xb, yb = get_batch()
    
    # evaluate the loss
    logits = model(xb)
    B, T, C = logits.shape
    logits = logits.reshape((B*T, C))
    yb = yb.reshape((B*T))
    loss = criterion(logits, yb)
    print(f"step {iter}: train loss {loss.cupy():.4f}")
    loss.backward()
    optimizer.step()


In [6]:

def generate(idx, max_new_tokens):
    # idx is (B, T) array of indices in the current context
    for _ in range(max_new_tokens):
        # crop idx to the last block_size tokens
        idx_cond = idx[:, -max_len:]
        # get the predictions
        logits= model(torch.Tensor(idx_cond))
        # focus only on the last time step
        logits = logits.cupy()[:, -1, :] # becomes (B, C)
        # apply softmax to get probabilities
        probs = ops.softmax(torch.Tensor(logits), axis=-1) # (B, C)
        # sample from the distribution
        idx_next = cupy.array([cupy.random.choice(logits.shape[1], size=1,p=p) for p in probs.cupy()])
        # append sampled index to the running sequence
        idx = cp.concatenate((idx, idx_next), axis=-1)# (B, T+1)
        
        yield idx_next

In [7]:
import sys
# generate from the model
context = cp.zeros((1,1),dtype=int)
gen = generate(context, max_new_tokens=500)
for updated_sequence in gen:
    # 将生成的结果转换为列表，并更新输出
    decoded_output = decode(updated_sequence[0].tolist())
    sys.stdout.write(decoded_output)
    sys.stdout.flush()  # 强制刷新输出

mgggOI$oGHWYKIQ'JITQQdMQgQ-'-aTnaM::aamdaP',dGdpr:iVpxdZ-MWlmQCKmTaTTxap!xaJ'xooax:$O-
dTpxXsJdTgxZ-WADUaHWm&&VaZxasTWcaQTruVxYJEaPpwXxdpDCGTamxK-apComYuydcgpBLwKTTHTuTKxGaV'doa$QDxxTQxWxIxToxJWczTuLTGWwx-xcp,dDTVn-TuconKTYWZ,UNLuaa,-TvWxdxTd
odPaXYsxapLWEgaX-acQuLTxKasVoPam:GGadOxqlGhxHDc TZQJmcpQQxrWwxJdJdaT
pQxTWCTxAYQxhJxXAHxxx-xKmMxTlOxozmDxToaTWJWxjw
'dTgoxxjVTxGDrcOuOLT:
LIrauaCdxxx.xQoaKagxgo:-KaJ$paBbK-WTGdbpaCGYcsw:kMDZrKaxoDQ-dY
rtG
apo-GKJdQQT:TCzxaaQ-xKa$-UaP
DTooQoT!WDoKTxx!mJ txvc