In [None]:
import torch
import tqdm
import pickle
import random
import pandas as pd
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [None]:
augments = []
base_items = []
full_items = []
units = []
reroll_tokens = ['CAN_REROLL', 'CANT_REROLL']
action_tokens = ['ACTION_BUY', 'ACTION_SELL', 'ACTION_MOVE', 'ACTION_ITEM',
                 'ACTION_AUGMENT', 'ACTION_REROLL', 'ACTION_NONE']
other_tokens = ['GO','EOS', 'MASK', 'PAD', 'HERO_BOUNDARY', 'BENCH_BOUNDARY', 'SHOP_BOUNDARY',
                'ITEM_BOUNDARY', 'AUGMENT_BOUNDARY', 'ITEM_SLOT', 'AUGMENT_SLOT', 'HERO_SLOT']
#read in augments
with open('augments.csv', 'r') as f:
    for line in f:
        tokens = line.strip().split(',') #strip new line and commas
        string = ''.join(tokens) #join characters in string
        augments.append(string) #add to list
#read in base items
with open('base_items.csv', 'r') as f:
    for line in f:
        tokens = line.strip().split(',')
        string = ''.join(tokens)
        base_items.append(string)
#read in full items
with open('full_items.csv', 'r') as f:
    for line in f:
        tokens = line.strip().split(',')
        string = ''.join(tokens)
        full_items.append(string)
#read in units
with open('units.csv', 'r') as f:
    for line in f:
        tokens = line.strip().split(',')
        string = ''.join(tokens)
        units.append(string)
#a couple of the augments got skewed in data transfer, change them to the correct name
augments[augments.index('Cower"" Weaklings!')] = 'Cower, Weaklings!'
augments[augments.index('One"" Two"" Five!')] = 'One, Two, Five!'
augments[augments.index('10""000 IQ')] = '10,000 IQ'
augments[augments.index('One Buff"" Two Buff')] = 'One Buff, Two Buff'
tokens = augments + base_items + full_items + units + reroll_tokens + action_tokens + other_tokens #combine all tokens
hero_boundary, bench_boundary, shop_boundary, item_boundary, augment_boundary, reroll, decision = 45, 56, 62, 78, 82, 83, 84 #important positions on the board

In [None]:
#saved off the vocab so the idx would be same, read it in
with open('vocab.pkl', 'rb') as f:
    vocab = pickle.load(f)
with open('inv_vocab.pkl', 'rb') as f:
    inv_vocab = pickle.load(f)
vocab

{'Ziggs': 0,
 'Pair of Fours': 1,
 'Blazing Soul II': 2,
 "Zhonya's Paradox": 3,
 'Illaoi': 4,
 'Cower, Weaklings!': 5,
 "Slammin'+": 6,
 'ReinFOURcement': 7,
 'Adaptive Strikes': 8,
 'Rapidfire Crest': 9,
 'Syndicate Emblem': 10,
 "Nashor's Tooth": 11,
 'Statikk Shiv': 12,
 'ITEM_SLOT': 13,
 'Roll The Dice': 14,
 'Iron Assets': 15,
 'SHOP_BOUNDARY': 16,
 'Giant Slayer': 17,
 'Golden Fleece': 18,
 'Slayer Crown': 19,
 'One For All II': 20,
 'Recurve Bow': 21,
 'Senna': 22,
 "I'm the Carry Now": 23,
 'Prismatic Ticket': 24,
 'CAN_REROLL': 25,
 'Chain Vest': 26,
 'HERO_BOUNDARY': 27,
 'Missed Connections': 28,
 'Lucky Gloves': 29,
 'Bastion Crown': 30,
 'Marksman Circlet': 31,
 'Placebo': 32,
 'Bastion Crest': 33,
 'Cooking Pot': 34,
 'Marksman Crest': 35,
 'Eye For An Eye+': 36,
 'Void Swarm': 37,
 'Slayer Emblem': 38,
 'Jhin': 39,
 'New High Score': 40,
 'Starry Night+': 41,
 'Vi': 42,
 'Dynamo Circlet': 43,
 "Pandora's Items III": 44,
 'Anima Visage': 45,
 'Slayer Crest': 46,
 'Zephyr

In [None]:
def weighted_random_choice(arr1, arr2, weight=0.9): #weighted choice between two arrays to better simulate real board
    from_arr1 = random.choices([True, False], weights=[weight, 1 - weight])[0] #decide what array

    if from_arr1: 
        index = random.randint(0, len(arr1) - 1) #then just chose random from selected array
        return arr1[index]
    else:
        index = random.randint(0, len(arr2) - 1)
        return arr2[index]
    
def make_board():
  board = ['GO']
  board_heros = []
  bench_items = []
  bench_heros = []
  random_board_size = random.randint(1, 11) #choose how many units
  for i in range(11):
      if i < random_board_size:
          board.append(random.choice(units)) #choose a random unit
          board_heros.append(board[-1])
          random_items = random.randint(0,3) #choose random items
          for j in range(3):
              if j < random_items:
                  board.append(weighted_random_choice(full_items, base_items, weight=.95))
              else:
                  board.append('ITEM_SLOT') #open item
      elif i == random_board_size: #otherwise append HERO_SLOT for open champ spot
          open_slot = weighted_random_choice(['PAD'], ['HERO_SLOT'], weight =1)
          if open_slot == 'HERO_SLOT':
              board.append(open_slot)
              board.append('ITEM_SLOT')
              board.append('ITEM_SLOT')
              board.append('ITEM_SLOT')
          else:
              board.append('PAD')
              board.append('PAD')
              board.append('PAD')
              board.append('PAD')
      else:
          board.append('PAD')
          board.append('PAD')
          board.append('PAD')
          board.append('PAD')
  board.append('HERO_BOUNDARY') #add boundary
  random_bench_size = random.randint(0,10) #add random bench units
  for i in range(10):
      if i < random_bench_size:
          board.append(random.choice(units))
          bench_heros = board[-1]
      else:
          board.append('PAD')
  board.append('BENCH_BOUNDARY') #add bench boundary
  for i in range(5): 
      board.append(random.choice(units)) #add 5 random shop units
  board.append('SHOP_BOUNDARY')
  random_item_size = random.randint(2,15) #add random amount of items
  for i in range(15):
      if i < random_item_size:
          board.append(weighted_random_choice(base_items,full_items)) #more likely chance they are base than full
          bench_items.append(board[-1])
      else:
          board.append('ITEM_SLOT')
  board.append('ITEM_BOUNDARY') #item boundary
  random_augment_size = random.randint(0,3) #add random amount of augments
  for i in range(3):
      if i < random_augment_size:
          board.append(random.choice(augments))
      else:
          board.append('AUGMENT_SLOT')
  board.append('AUGMENT_BOUNDARY') #augment boundary
  board.append(random.choice(reroll_tokens)) #whether or not enough gold to reroll
  board.append('ACTION_AUGMENT') #augment model so action augment token
  board.append(random.choice(augments)) #random augment option
  board.append(random.choice(augments)) #random augment option
  board.append(random.choice(augments)) #random augment option
  board.append('MASK') #MASK token for decision
  board.append('EOS') #EOS
  return board

def make_penalty(board): #augment penalty easy, must be one of the three augment options
  penalty_vector = np.zeros(len(vocab))
  penalty_vector[vocab[board[decision+1]]] = 1/3
  penalty_vector[vocab[board[decision+2]]] = 1/3
  penalty_vector[vocab[board[decision+3]]] = 1/3
  return penalty_vector

In [None]:
augment_dataset_list = []
augment_penalties_list = []

for _ in tqdm.tqdm(range(200704)): #make board and penalty pairs
    board = make_board()
    penalty = make_penalty(board)
    augment_dataset_list.append(board)
    augment_penalties_list.append(penalty)

#convert to DataFrames after
augment_dataset = pd.DataFrame(augment_dataset_list, columns=range(90))
augment_penalties = pd.DataFrame(augment_penalties_list, columns=range(len(vocab)))

100%|██████████| 200704/200704 [00:13<00:00, 15232.28it/s]


In [None]:
augment_dataset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,80,81,82,83,84,85,86,87,88,89
0,GO,Urgot,Obsidian Cleaver,ITEM_SLOT,ITEM_SLOT,PAD,PAD,PAD,PAD,PAD,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CAN_REROLL,ACTION_AUGMENT,Golden Fleece,Salvage Bin+,Cluttered Mind,MASK,EOS
1,GO,Draven,Sparring Gloves,Infinity Edge,Giant's Belt,Garen,Urf-Angel's Staff,Dynamo Emblem,ITEM_SLOT,Garen,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CANT_REROLL,ACTION_AUGMENT,Adaptive Strikes,Wand Overflow,Street Demon Circlet,MASK,EOS
2,GO,Varus,Deathblade,ITEM_SLOT,ITEM_SLOT,Urgot,Slayer Emblem,Nashor's Tooth,Rascal's Gloves,Kindred,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CANT_REROLL,ACTION_AUGMENT,I'm the Carry Now,Titanic Titan,Marksman Crest,MASK,EOS
3,GO,Galio,Lich Bane,ITEM_SLOT,ITEM_SLOT,Samira,ITEM_SLOT,ITEM_SLOT,ITEM_SLOT,Jax,...,Spoils of War II,Long Distance Pals,AUGMENT_BOUNDARY,CANT_REROLL,ACTION_AUGMENT,Scavenger,Keep Your Friends Close,Bastion Crest,MASK,EOS
4,GO,Naafiri,Hand Of Justice,Prowler's Claw,Spear of Hirana,Kobuko,Talisman Of Ascension,Obsidian Cleaver,ITEM_SLOT,Kog'Maw,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CANT_REROLL,ACTION_AUGMENT,Mentorship II,Healing Orbs I,Find Your Center,MASK,EOS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200699,GO,Varus,Obsidian Cleaver,Anima Squad Emblem,ITEM_SLOT,Fiddlesticks,Brink of Dawn,Golden Ox Emblem,ITEM_SLOT,Vex,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CAN_REROLL,ACTION_AUGMENT,Reactive Shell,Build a Bud!,Cutpurse,MASK,EOS
200700,GO,Yuumi,Suspicious Trench Coat,Horizon Focus,Quicksilver,Urgot,Titan's Resolve,BoomBot Emblem,ITEM_SLOT,Rengar,...,Support Mining+,Bastion Circlet,AUGMENT_BOUNDARY,CANT_REROLL,ACTION_AUGMENT,"10,000 IQ",Keep Your Friends Close,BoomBot Crest,MASK,EOS
200701,GO,Gragas,Rapid Firecannon,ITEM_SLOT,ITEM_SLOT,Cho'Gath,Gold Collector,ITEM_SLOT,ITEM_SLOT,Annie,...,Prizefighter,AUGMENT_SLOT,AUGMENT_BOUNDARY,CAN_REROLL,ACTION_AUGMENT,Flexible,Tower Defense,Transference,MASK,EOS
200702,GO,Vayne,Warmog's Armor,ITEM_SLOT,ITEM_SLOT,Fiddlesticks,Strategist Emblem,ITEM_SLOT,ITEM_SLOT,Kog'Maw,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CANT_REROLL,ACTION_AUGMENT,Malicious Monetization,Eye For An Eye,Cybernetic Bulk III,MASK,EOS


In [None]:
augment_penalties

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,536,537,538,539,540,541,542,543,544,545
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
200700,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
200701,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
200702,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
#custom dataset
class MaskedTokenDataset(Dataset):
    def __init__(self, sequences, vocab, penalties):
        self.sequences = sequences #read in sequences as token array
        self.vocab = vocab #vocab from pkl file
        self.penalties = penalties #read in penalties as array

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        ids = self.sequences[idx]
        input_ids = [self.vocab[t] for t in ids] #convert to input ids
        penalty = self.penalties[idx]
        return {'input':torch.tensor(input_ids), 'penalty': torch.tensor(penalty)} #convert to tensors


In [None]:
#transformer model
class MaskedTokenTransformer(nn.Module):
    def __init__(self, vocab_size, d_model=512, nhead=8, num_layers=4, dim_feedforward=512, dropout=.2):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, d_model) #embedding layer
        self.pos_embedding = nn.Embedding(90, d_model) #positional embedding
        self.dropout = nn.Dropout(dropout) #dropout for overfitting

        encoder_layer = nn.TransformerEncoderLayer( 
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers) #transformer layer

        self.head = nn.Linear(d_model, vocab_size) #fully connected output layer that maps to vocab

    def forward(self, x):
        positions = torch.arange(x.size(1), device=x.device).unsqueeze(0)
        x = self.embedding(x) + self.pos_embedding(positions) #embedding and positions
        x = self.dropout(x) #dropout

        x = self.transformer(x) #run through transformer

        logits = self.head(x) #get output

        return logits

In [None]:
def custom_loss(outputs, target_indices, expected_classes):
    batch_size = outputs.size(0)

    selected_logits = outputs[torch.arange(batch_size), target_indices]  #get the logits at the target positions
    
    loss = nn.functional.kl_div(nn.functional.log_softmax(selected_logits, dim=-1), expected_classes, reduction='batchmean') #apply kl_div loss at these positions, switched to cross entropy for other models
    return loss

In [None]:
epochs = 15
batch_size = 2048 
sequences = augment_dataset.values.tolist()
expected = augment_penalties.values.tolist()

data = MaskedTokenDataset(sequences, vocab, expected)
dataloader = DataLoader(data, batch_size=batch_size, drop_last=True)

model = MaskedTokenTransformer(len(vocab)).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

In [None]:
for epoch in range(epochs):
    total_loss = 0
    tot_penalty = 0
    for inputs in dataloader:
        input_ids = inputs['input'].to(device)
        expected_tensor = inputs['penalty'].to(device)
        optimizer.zero_grad()
        logits = model(input_ids)
        loss= custom_loss(logits, decision+4, expected_tensor)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")

Epoch 1/15 - Loss: 4.5721
Epoch 2/15 - Loss: 1.6463
Epoch 3/15 - Loss: 0.6142
Epoch 4/15 - Loss: 0.4512
Epoch 5/15 - Loss: 0.3817
Epoch 6/15 - Loss: 0.3387
Epoch 7/15 - Loss: 0.3068
Epoch 8/15 - Loss: 0.2824
Epoch 9/15 - Loss: 0.2626
Epoch 10/15 - Loss: 0.2463
Epoch 11/15 - Loss: 0.2319
Epoch 12/15 - Loss: 0.2194
Epoch 13/15 - Loss: 0.2092
Epoch 14/15 - Loss: 0.1992
Epoch 15/15 - Loss: 0.1905


In [None]:
def evaluate(model, input_ids, masked_index):

    model.eval()
    with torch.no_grad():
        logits = model(input_ids)  #get logits
        masked_logits = logits[0, masked_index]  #only care about logits at specific position
        aug1 = masked_logits[input_ids[0][decision+1].item()] #logits at aug1
        aug2 = masked_logits[input_ids[0][decision+2].item()] #logits at aug2
        aug3 = masked_logits[input_ids[0][decision+3].item()] #logits at aug3

        #this should have been adjusted to argmax but at first the augment model was not performing well
        if aug1 >= aug2 and aug1 >= aug3:
          predicted_id = input_ids[0][decision+1].item() #aug1 has best logit
        elif aug2 >= aug3:
          predicted_id = input_ids[0][decision+2].item() #aug2 has best logit
        else:
          predicted_id = input_ids[0][decision+3].item() #aug3 has best logit
        print(f"Token ID {input_ids[0][decision+1].item():>5} | Logit: {aug1:.8f} | Token: {inv_vocab[input_ids[0][decision+1].item()]}") #print for verify
        print(f"Token ID {input_ids[0][decision+2].item():>5} | Logit: {aug2:.8f} | Token: {inv_vocab[input_ids[0][decision+2].item()]}") #print for verify
        print(f"Token ID {input_ids[0][decision+3].item():>5} | Logit: {aug3:.8f} | Token: {inv_vocab[input_ids[0][decision+3].item()]}") #print for verify
        print(f"Predicted token ID at index {masked_index}: {inv_vocab[predicted_id]}")
        positive_indices = (masked_logits > min(input_ids[0][decision+1].item(), input_ids[0][decision+2].item(), input_ids[0][decision+3].item())).nonzero(as_tuple=True)[0]  #print out any idx that are higher than the min aug, should be none
        for idx in positive_indices:
          logit_value = masked_logits[idx].item()
          print(f"Token ID {idx.item():>5} | Logit: {logit_value:.8f} | Token: {inv_vocab[idx.item()]}")

    return predicted_id

In [None]:
for i in range(5):
  board = make_board()
  print(board[decision:-1])
  input_ids = [vocab[t] for t in board]
  id = evaluate(model, torch.tensor(input_ids).unsqueeze(0).to(device), decision+4)

['ACTION_AUGMENT', 'Golden Fleece', 'Speed Kills', 'Hard Commit', 'MASK']
Token ID    18 | Logit: 10.24877930 | Token: Golden Fleece
Token ID   124 | Logit: 10.65174389 | Token: Speed Kills
Token ID   130 | Logit: 10.60978413 | Token: Hard Commit
Predicted token ID at index 88: Speed Kills
['ACTION_AUGMENT', 'Manaflow I', 'Void Swarm', 'Raining Gold+', 'MASK']
Token ID    93 | Logit: 10.13298512 | Token: Manaflow I
Token ID    37 | Logit: 11.00205421 | Token: Void Swarm
Token ID    77 | Logit: 9.45095539 | Token: Raining Gold+
Predicted token ID at index 88: Void Swarm
['ACTION_AUGMENT', 'Bastion Crest', 'Transistor', 'Adaptive Strikes', 'MASK']
Token ID    33 | Logit: 10.01325130 | Token: Bastion Crest
Token ID   540 | Logit: 9.45168400 | Token: Transistor
Token ID     8 | Logit: 8.77118397 | Token: Adaptive Strikes
Predicted token ID at index 88: Bastion Crest
Token ID     8 | Logit: 8.77118397 | Token: Adaptive Strikes
Token ID    33 | Logit: 10.01325130 | Token: Bastion Crest
Token

In [None]:
torch.save(model.state_dict(), "augment_model.pth")