In [1]:
import torch
import tqdm
import pickle
import random
import pandas as pd
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [None]:
augments = []
base_items = []
full_items = []
units = []
reroll_tokens = ['CAN_REROLL', 'CANT_REROLL']
action_tokens = ['ACTION_BUY', 'ACTION_SELL', 'ACTION_MOVE', 'ACTION_ITEM',
                 'ACTION_AUGMENT', 'ACTION_REROLL', 'ACTION_NONE']
other_tokens = ['GO','EOS', 'MASK', 'PAD', 'HERO_BOUNDARY', 'BENCH_BOUNDARY', 'SHOP_BOUNDARY',
                'ITEM_BOUNDARY', 'AUGMENT_BOUNDARY', 'ITEM_SLOT', 'AUGMENT_SLOT', 'HERO_SLOT']
#read in augments
with open('augments.csv', 'r') as f:
    for line in f:
        tokens = line.strip().split(',') #strip new line and commas
        string = ''.join(tokens) #join characters in string
        augments.append(string) #add to list
#read in base items
with open('base_items.csv', 'r') as f:
    for line in f:
        tokens = line.strip().split(',')
        string = ''.join(tokens)
        base_items.append(string)
#read in full items
with open('full_items.csv', 'r') as f:
    for line in f:
        tokens = line.strip().split(',')
        string = ''.join(tokens)
        full_items.append(string)
#read in units
with open('units.csv', 'r') as f:
    for line in f:
        tokens = line.strip().split(',')
        string = ''.join(tokens)
        units.append(string)
#a couple of the augments got skewed in data transfer, change them to the correct name
augments[augments.index('Cower"" Weaklings!')] = 'Cower, Weaklings!'
augments[augments.index('One"" Two"" Five!')] = 'One, Two, Five!'
augments[augments.index('10""000 IQ')] = '10,000 IQ'
augments[augments.index('One Buff"" Two Buff')] = 'One Buff, Two Buff'
tokens = augments + base_items + full_items + units + reroll_tokens + action_tokens + other_tokens #combine all tokens
hero_boundary, bench_boundary, shop_boundary, item_boundary, augment_boundary, reroll, decision = 45, 56, 62, 78, 82, 83, 84 #important positions on the board

In [None]:
#saved off the vocab so the idx would be same, read it in
with open('vocab.pkl', 'rb') as f:
    vocab = pickle.load(f)
with open('inv_vocab.pkl', 'rb') as f:
    inv_vocab = pickle.load(f)
vocab

{'Ziggs': 0,
 'Pair of Fours': 1,
 'Blazing Soul II': 2,
 "Zhonya's Paradox": 3,
 'Illaoi': 4,
 'Cower, Weaklings!': 5,
 "Slammin'+": 6,
 'ReinFOURcement': 7,
 'Adaptive Strikes': 8,
 'Rapidfire Crest': 9,
 'Syndicate Emblem': 10,
 "Nashor's Tooth": 11,
 'Statikk Shiv': 12,
 'ITEM_SLOT': 13,
 'Roll The Dice': 14,
 'Iron Assets': 15,
 'SHOP_BOUNDARY': 16,
 'Giant Slayer': 17,
 'Golden Fleece': 18,
 'Slayer Crown': 19,
 'One For All II': 20,
 'Recurve Bow': 21,
 'Senna': 22,
 "I'm the Carry Now": 23,
 'Prismatic Ticket': 24,
 'CAN_REROLL': 25,
 'Chain Vest': 26,
 'HERO_BOUNDARY': 27,
 'Missed Connections': 28,
 'Lucky Gloves': 29,
 'Bastion Crown': 30,
 'Marksman Circlet': 31,
 'Placebo': 32,
 'Bastion Crest': 33,
 'Cooking Pot': 34,
 'Marksman Crest': 35,
 'Eye For An Eye+': 36,
 'Void Swarm': 37,
 'Slayer Emblem': 38,
 'Jhin': 39,
 'New High Score': 40,
 'Starry Night+': 41,
 'Vi': 42,
 'Dynamo Circlet': 43,
 "Pandora's Items III": 44,
 'Anima Visage': 45,
 'Slayer Crest': 46,
 'Zephyr

In [None]:
def weighted_random_choice(arr1, arr2, weight=0.9): #weighted choice between two arrays to better simulate real board
    from_arr1 = random.choices([True, False], weights=[weight, 1 - weight])[0] #decide what array

    if from_arr1: 
        index = random.randint(0, len(arr1) - 1) #then just chose random from selected array
        return arr1[index]
    else:
        index = random.randint(0, len(arr2) - 1)
        return arr2[index]
    
def make_board():
  board = ['GO']
  board_heros = []
  bench_items = []
  bench_heros = []
  random_board_size = random.randint(1, 11)
  for i in range(11):
      if i < random_board_size:
          board.append(random.choice(units))
          board_heros.append(board[-1])
          random_items = random.randint(0,3)
          for j in range(3):
              if j < random_items:
                  board.append(weighted_random_choice(full_items, base_items, weight=.95))
              else:
                  board.append('ITEM_SLOT')
      elif i == random_board_size:
          open_slot = weighted_random_choice(['PAD'], ['HERO_SLOT'], weight =1)
          if open_slot == 'HERO_SLOT':
              board.append(open_slot)
              board.append('ITEM_SLOT')
              board.append('ITEM_SLOT')
              board.append('ITEM_SLOT')
          else:
              board.append('PAD')
              board.append('PAD')
              board.append('PAD')
              board.append('PAD')
      else:
          board.append('PAD')
          board.append('PAD')
          board.append('PAD')
          board.append('PAD')
  board.append('HERO_BOUNDARY')
  random_bench_size = random.randint(0,10)
  for i in range(10):
      if i < random_bench_size:
          board.append(random.choice(units))
          bench_heros = board[-1]
      else:
          board.append('PAD')
  board.append('BENCH_BOUNDARY')
  for i in range(5):
      board.append(random.choice(units))
  board.append('SHOP_BOUNDARY')
  random_item_size = random.randint(2,15)
  for i in range(15):
      if i < random_item_size:
          board.append(weighted_random_choice(base_items,full_items))
          bench_items.append(board[-1])
      else:
          board.append('ITEM_SLOT')
  board.append('ITEM_BOUNDARY')
  random_augment_size = random.randint(0,3)
  for i in range(3):
      if i < random_augment_size:
          board.append(random.choice(augments))
      else:
          board.append('AUGMENT_SLOT')
  board.append('AUGMENT_BOUNDARY')
  board.append(random.choice(reroll_tokens))
  board.append('ACTION_ITEM')
  board.append(random.choice(bench_items))
  board.append('MASK')
  board.append('PAD')
  board.append('PAD')
  board.append('EOS')
  return board

def make_penalty(board): #two cases depending on first token
  penalty_vector = np.zeros(len(vocab))
  full = True if board[decision+1] in full_items else False
  if full: #if the first token is a full item
    board_units = set([unit for idx,unit in enumerate(board[0:hero_boundary]) if unit in unit and (board[idx+1] == 'ITEM_SLOT' or board[idx+2] == 'ITEM_SLOT' or board[idx+3] == 'ITEM_SLOT')])
    for unit in board_units:
      penalty_vector[vocab[unit]] = 1/len(board_units) #split against board units with a free item slot
  else: #if the first token is a base item
    actual_items = [item for item in board[shop_boundary+1:item_boundary] if item != 'ITEM_SLOT' and item not in full_items]
    actual_items.remove(board[decision+1])
    actual_items = set(actual_items)
    for item in actual_items:
        penalty_vector[vocab[item]] = 1/len(actual_items) #split between remaining base items
  return penalty_vector

During training, I did not add the correct penalty matrix. I spead penalties across any unit on the board, not just ones with open slots. This is most likely why it only got 90% accuracy.

In [None]:
item_dataset_list = []
item_penalties_list = []

for _ in tqdm.tqdm(range(200704)): #make board and penalty pairs
    board = make_board()
    penalty = make_penalty(board)
    item_dataset_list.append(board)
    item_penalties_list.append(penalty)

item_dataset = pd.DataFrame(item_dataset_list, columns=range(90))
item_penalties = pd.DataFrame(item_penalties_list, columns=range(len(vocab)))

100%|██████████| 200704/200704 [00:17<00:00, 11713.16it/s]


In [7]:
item_dataset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,80,81,82,83,84,85,86,87,88,89
0,GO,Darius,Hand Of Justice,Anima Squad Emblem,ITEM_SLOT,Vayne,Ionic Spark,Sunlight Cape,Statikk's Favor,PAD,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CANT_REROLL,ACTION_ITEM,Giant's Belt,MASK,PAD,PAD,EOS
1,GO,Jhin,Mogul's Mail,Mogul's Mail,ITEM_SLOT,Braum,ITEM_SLOT,ITEM_SLOT,ITEM_SLOT,Vayne,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CANT_REROLL,ACTION_ITEM,Chain Vest,MASK,PAD,PAD,EOS
2,GO,Vex,Gambler's Blade,Tactician's Cape,Legacy of the Colossus,Jhin,Unstable Treasure Chest,Sparring Gloves,ITEM_SLOT,Poppy,...,Preparation II,AUGMENT_SLOT,AUGMENT_BOUNDARY,CAN_REROLL,ACTION_ITEM,Sparring Gloves,MASK,PAD,PAD,EOS
3,GO,Shaco,ITEM_SLOT,ITEM_SLOT,ITEM_SLOT,Mordekaiser,Warmog's Pride,Blue Buff,ITEM_SLOT,Elise,...,Clear Mind,AUGMENT_SLOT,AUGMENT_BOUNDARY,CANT_REROLL,ACTION_ITEM,Sparring Gloves,MASK,PAD,PAD,EOS
4,GO,Leona,ITEM_SLOT,ITEM_SLOT,ITEM_SLOT,Cho'Gath,Anima Visage,ITEM_SLOT,ITEM_SLOT,Aurora,...,Piercing Lotus I,Item Grab Bag I,AUGMENT_BOUNDARY,CANT_REROLL,ACTION_ITEM,Needlessly Large Rod,MASK,PAD,PAD,EOS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200699,GO,Annie,Horizon Focus,ITEM_SLOT,ITEM_SLOT,Cho'Gath,Willbreaker,ITEM_SLOT,ITEM_SLOT,Viego,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CAN_REROLL,ACTION_ITEM,Tear of the Goddess,MASK,PAD,PAD,EOS
200700,GO,Aphelios,ITEM_SLOT,ITEM_SLOT,ITEM_SLOT,Leona,Slayer Emblem,Unstable Treasure Chest,ITEM_SLOT,Annie,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CAN_REROLL,ACTION_ITEM,Needlessly Large Rod,MASK,PAD,PAD,EOS
200701,GO,Poppy,ITEM_SLOT,ITEM_SLOT,ITEM_SLOT,Fiddlesticks,Street Demon Emblem,ITEM_SLOT,ITEM_SLOT,Fiddlesticks,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CANT_REROLL,ACTION_ITEM,Sparring Gloves,MASK,PAD,PAD,EOS
200702,GO,Darius,Rabadon's Deathcap,Slayer Emblem,ITEM_SLOT,LeBlanc,ITEM_SLOT,ITEM_SLOT,ITEM_SLOT,PAD,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CAN_REROLL,ACTION_ITEM,Chain Vest,MASK,PAD,PAD,EOS


In [8]:
item_penalties

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,536,537,538,539,540,541,542,543,544,545
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
200700,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
200701,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
200702,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
#transformer model
class MaskedTokenTransformer(nn.Module):
    def __init__(self, vocab_size, d_model=512, nhead=8, num_layers=4, dim_feedforward=512, dropout=.2):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, d_model) #embedding layer
        self.pos_embedding = nn.Embedding(90, d_model) #positional embedding
        self.dropout = nn.Dropout(dropout) #dropout for overfitting

        encoder_layer = nn.TransformerEncoderLayer( 
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers) #transformer layer

        self.head = nn.Linear(d_model, vocab_size) #fully connected output layer that maps to vocab

    def forward(self, x):
        positions = torch.arange(x.size(1), device=x.device).unsqueeze(0)
        x = self.embedding(x) + self.pos_embedding(positions) #embedding and positions
        x = self.dropout(x) #dropout

        x = self.transformer(x) #run through transformer

        logits = self.head(x) #get output

        return logits

In [None]:
#custom dataset
class MaskedTokenDataset(Dataset):
    def __init__(self, sequences, vocab, penalties):
        self.sequences = sequences #read in sequences as token array
        self.vocab = vocab #vocab from pkl file
        self.penalties = penalties #read in penalties as array

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        ids = self.sequences[idx]
        input_ids = [self.vocab[t] for t in ids] #convert to input ids
        penalty = self.penalties[idx]
        return {'input':torch.tensor(input_ids), 'penalty': torch.tensor(penalty)} #convert to tensors

In [None]:
def custom_loss(outputs, target_indices, expected_classes):
    batch_size = outputs.size(0)

    selected_logits = outputs[torch.arange(batch_size), target_indices]   #get the logits at the target positions
    loss = nn.functional.cross_entropy(selected_logits, expected_classes) #apply cross-entropy loss at these positions
    return loss

In [None]:
epochs =40
batch_size = 2048
sequences = item_dataset.values.tolist()
expected = item_penalties.values.tolist()

data = MaskedTokenDataset(sequences, vocab, expected)
dataloader = DataLoader(data, batch_size=batch_size, drop_last=True)

model = MaskedTokenTransformer(len(vocab)).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=.1e-4)

In [13]:
for epoch in range(epochs):
    total_loss = 0
    tot_penalty = 0
    for inputs in dataloader:
        input_ids = inputs['input'].to(device)
        expected_tensor = inputs['penalty'].to(device)
        optimizer.zero_grad()
        logits = model(input_ids)
        loss= custom_loss(logits, decision+2, expected_tensor)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")

Epoch 1/40 - Loss: 4.3067
Epoch 2/40 - Loss: 3.0152
Epoch 3/40 - Loss: 2.8298
Epoch 4/40 - Loss: 2.7414
Epoch 5/40 - Loss: 2.6574
Epoch 6/40 - Loss: 2.5433
Epoch 7/40 - Loss: 2.3511
Epoch 8/40 - Loss: 2.2198
Epoch 9/40 - Loss: 2.1647
Epoch 10/40 - Loss: 2.1329
Epoch 11/40 - Loss: 2.1110
Epoch 12/40 - Loss: 2.0921
Epoch 13/40 - Loss: 2.0730
Epoch 14/40 - Loss: 2.0396
Epoch 15/40 - Loss: 1.9518
Epoch 16/40 - Loss: 1.8420
Epoch 17/40 - Loss: 1.7779
Epoch 18/40 - Loss: 1.7504
Epoch 19/40 - Loss: 1.7351
Epoch 20/40 - Loss: 1.7251
Epoch 21/40 - Loss: 1.7168
Epoch 22/40 - Loss: 1.7102
Epoch 23/40 - Loss: 1.7042
Epoch 24/40 - Loss: 1.6996
Epoch 25/40 - Loss: 1.6950
Epoch 26/40 - Loss: 1.6906
Epoch 27/40 - Loss: 1.6873
Epoch 28/40 - Loss: 1.6838
Epoch 29/40 - Loss: 1.6810
Epoch 30/40 - Loss: 1.6778
Epoch 31/40 - Loss: 1.6752
Epoch 32/40 - Loss: 1.6725
Epoch 33/40 - Loss: 1.6699
Epoch 34/40 - Loss: 1.6675
Epoch 35/40 - Loss: 1.6651
Epoch 36/40 - Loss: 1.6625
Epoch 37/40 - Loss: 1.6605
Epoch 38/4

In [None]:
def evaluate(model, input_ids, masked_index):
    model.eval()
    with torch.no_grad():
        logits = model(input_ids)  #get logits
        masked_logits = logits[0, masked_index]  #only care about logits at specific position
        for item in base_items:
          print(item, masked_logits[vocab[item]])

        predicted_id = masked_logits.argmax(dim=-1).item()
        print(f"Predicted token ID at index {masked_index}: {inv_vocab[predicted_id]} : {max(masked_logits)}")

    return predicted_id

In [15]:
for i in range(5):
  board = make_board()
  penalty = make_penalty(board)
  print(board[decision+1])
  print(board[shop_boundary+1:item_boundary])
  input_ids = [vocab[t] for t in board]
  id = evaluate(model, torch.tensor(input_ids).unsqueeze(0).to(device), decision+2)

Recurve Bow
["Giant's Belt", 'Tear of the Goddess', 'Negatron Cloak', 'Negatron Cloak', 'B.F. Sword', 'Needlessly Large Rod', "Giant's Belt", 'Chain Vest', "Giant's Belt", "Thief's Gloves", 'Vanguard Emblem', 'Recurve Bow', 'Recurve Bow', 'Sparring Gloves', 'ITEM_SLOT']
B.F. Sword tensor(7.6214, device='cuda:0')
Recurve Bow tensor(7.5521, device='cuda:0')
Needlessly Large Rod tensor(7.4567, device='cuda:0')
Tear of the Goddess tensor(7.5896, device='cuda:0')
Chain Vest tensor(7.7406, device='cuda:0')
Negatron Cloak tensor(7.6682, device='cuda:0')
Giant's Belt tensor(7.5891, device='cuda:0')
Sparring Gloves tensor(7.4819, device='cuda:0')
Predicted token ID at index 86: Chain Vest : 7.740604877471924
Needlessly Large Rod
['Chain Vest', 'Chain Vest', 'Chain Vest', "Sniper's Focus", 'Needlessly Large Rod', 'Sparring Gloves', 'B.F. Sword', "Giant's Belt", 'Needlessly Large Rod', 'Needlessly Large Rod', 'Moonstone Renewer', 'Recurve Bow', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT']
B.F. Sword te

In [None]:
#more training
epochs=20
for epoch in range(epochs):
    total_loss = 0
    tot_penalty = 0
    for inputs in dataloader:
        input_ids = inputs['input'].to(device)
        expected_tensor = inputs['penalty'].to(device)
        optimizer.zero_grad()
        logits = model(input_ids)
        loss= custom_loss(logits, decision+2, expected_tensor)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")

Epoch 1/20 - Loss: 1.6306
Epoch 2/20 - Loss: 1.6273
Epoch 3/20 - Loss: 1.6253
Epoch 4/20 - Loss: 1.6237
Epoch 5/20 - Loss: 1.6221
Epoch 6/20 - Loss: 1.6207
Epoch 7/20 - Loss: 1.6195
Epoch 8/20 - Loss: 1.6186
Epoch 9/20 - Loss: 1.6178
Epoch 10/20 - Loss: 1.6171
Epoch 11/20 - Loss: 1.6166
Epoch 12/20 - Loss: 1.6161
Epoch 13/20 - Loss: 1.6157
Epoch 14/20 - Loss: 1.6153
Epoch 15/20 - Loss: 1.6150
Epoch 16/20 - Loss: 1.6147
Epoch 17/20 - Loss: 1.6145
Epoch 18/20 - Loss: 1.6142
Epoch 19/20 - Loss: 1.6140
Epoch 20/20 - Loss: 1.6139


In [18]:
for i in range(5):
  board = make_board()
  penalty = make_penalty(board)
  print(board[decision+1])
  print(board[shop_boundary+1:item_boundary])
  input_ids = [vocab[t] for t in board]
  id = evaluate(model, torch.tensor(input_ids).unsqueeze(0).to(device), decision+2)

Giant's Belt
["Bulwark's Oath", 'Negatron Cloak', "Giant's Belt", 'Sparring Gloves', "Giant's Belt", 'Chain Vest', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT']
B.F. Sword tensor(2.3185, device='cuda:0')
Recurve Bow tensor(2.2864, device='cuda:0')
Needlessly Large Rod tensor(2.2338, device='cuda:0')
Tear of the Goddess tensor(2.0830, device='cuda:0')
Chain Vest tensor(9.0834, device='cuda:0')
Negatron Cloak tensor(9.1446, device='cuda:0')
Giant's Belt tensor(9.1197, device='cuda:0')
Sparring Gloves tensor(9.0855, device='cuda:0')
Predicted token ID at index 86: Negatron Cloak : 9.144562721252441
Hand Of Justice
['B.F. Sword', "Giant's Belt", 'Hand Of Justice', 'Negatron Cloak', 'Negatron Cloak', "Giant's Belt", 'Sparring Gloves', 'Sparring Gloves', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT']
B.F. Sword tensor(0.3585, device='cuda:0')
Recurve Bow tensor(-0.0699, device='

In [20]:
torch.save(model.state_dict(), "item2_model.pth")