In [None]:
import torch
import tqdm
import pickle
import random
import pandas as pd
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [None]:
augments = []
base_items = []
full_items = []
units = []
reroll_tokens = ['CAN_REROLL', 'CANT_REROLL']
action_tokens = ['ACTION_BUY', 'ACTION_SELL', 'ACTION_MOVE', 'ACTION_ITEM',
                 'ACTION_AUGMENT', 'ACTION_REROLL', 'ACTION_NONE']
other_tokens = ['GO','EOS', 'MASK', 'PAD', 'HERO_BOUNDARY', 'BENCH_BOUNDARY', 'SHOP_BOUNDARY',
                'ITEM_BOUNDARY', 'AUGMENT_BOUNDARY', 'ITEM_SLOT', 'AUGMENT_SLOT', 'HERO_SLOT']

with open('augments.csv', 'r') as f:
    for line in f:
        # Strip newline and commas, then split by comma and join characters
        tokens = line.strip().split(',')
        string = ''.join(tokens)
        augments.append(string)
with open('base_items.csv', 'r') as f:
    for line in f:
        # Strip newline and commas, then split by comma and join characters
        tokens = line.strip().split(',')
        string = ''.join(tokens)
        base_items.append(string)
with open('full_items.csv', 'r') as f:
    for line in f:
        # Strip newline and commas, then split by comma and join characters
        tokens = line.strip().split(',')
        string = ''.join(tokens)
        full_items.append(string)
with open('units.csv', 'r') as f:
    for line in f:
        # Strip newline and commas, then split by comma and join characters
        tokens = line.strip().split(',')
        string = ''.join(tokens)
        units.append(string)
augments[augments.index('Cower"" Weaklings!')] = 'Cower, Weaklings!'
augments[augments.index('One"" Two"" Five!')] = 'One, Two, Five!'
augments[augments.index('10""000 IQ')] = '10,000 IQ'
augments[augments.index('One Buff"" Two Buff')] = 'One Buff, Two Buff'
tokens = augments + base_items + full_items + units + reroll_tokens + action_tokens + other_tokens
hero_boundary, bench_boundary, shop_boundary, item_boundary, augment_boundary, reroll, decision= 45, 56, 62, 78, 82, 83, 84

In [None]:
with open('vocab.pkl', 'rb') as f:
    vocab = pickle.load(f)
with open('inv_vocab.pkl', 'rb') as f:
    inv_vocab = pickle.load(f)
vocab

{'Ziggs': 0,
 'Pair of Fours': 1,
 'Blazing Soul II': 2,
 "Zhonya's Paradox": 3,
 'Illaoi': 4,
 'Cower, Weaklings!': 5,
 "Slammin'+": 6,
 'ReinFOURcement': 7,
 'Adaptive Strikes': 8,
 'Rapidfire Crest': 9,
 'Syndicate Emblem': 10,
 "Nashor's Tooth": 11,
 'Statikk Shiv': 12,
 'ITEM_SLOT': 13,
 'Roll The Dice': 14,
 'Iron Assets': 15,
 'SHOP_BOUNDARY': 16,
 'Giant Slayer': 17,
 'Golden Fleece': 18,
 'Slayer Crown': 19,
 'One For All II': 20,
 'Recurve Bow': 21,
 'Senna': 22,
 "I'm the Carry Now": 23,
 'Prismatic Ticket': 24,
 'CAN_REROLL': 25,
 'Chain Vest': 26,
 'HERO_BOUNDARY': 27,
 'Missed Connections': 28,
 'Lucky Gloves': 29,
 'Bastion Crown': 30,
 'Marksman Circlet': 31,
 'Placebo': 32,
 'Bastion Crest': 33,
 'Cooking Pot': 34,
 'Marksman Crest': 35,
 'Eye For An Eye+': 36,
 'Void Swarm': 37,
 'Slayer Emblem': 38,
 'Jhin': 39,
 'New High Score': 40,
 'Starry Night+': 41,
 'Vi': 42,
 'Dynamo Circlet': 43,
 "Pandora's Items III": 44,
 'Anima Visage': 45,
 'Slayer Crest': 46,
 'Zephyr

In [None]:
def weighted_random_choice(arr1, arr2, weight=0.9):
    """
    Selects a random index from arr1 or arr2 with higher probability for arr1.

    Args:
    - arr1: First array (higher probability selection).
    - arr2: Second array (lower probability selection).
    - weight: Probability of choosing arr1 (default 80%).

    Returns:
    - (index, selected_array) where index is from the chosen array.
    """
    total_length = len(arr1) + len(arr2)

    # Choose which array to pick from
    from_arr1 = random.choices([True, False], weights=[weight, 1 - weight])[0]

    if from_arr1:
        index = random.randint(0, len(arr1) - 1)
        return arr1[index]
    else:
        index = random.randint(0, len(arr2) - 1)
        return arr2[index]

In [None]:
def make_board():
  board = ['GO']
  board_heros = []
  bench_items = []
  bench_heros = []
  random_board_size = random.randint(1, 11)
  for i in range(11):
      if i < random_board_size:
          board.append(random.choice(units))
          board_heros.append(board[-1])
          random_items = random.randint(0,3)
          for j in range(3):
              if j < random_items:
                  board.append(weighted_random_choice(full_items, base_items, weight=.95))
              else:
                  board.append('ITEM_SLOT')
      elif i == random_board_size:
          open_slot = weighted_random_choice(['PAD'], ['HERO_SLOT'], weight =1)
          if open_slot == 'HERO_SLOT':
              board.append(open_slot)
              board.append('ITEM_SLOT')
              board.append('ITEM_SLOT')
              board.append('ITEM_SLOT')
          else:
              board.append('PAD')
              board.append('PAD')
              board.append('PAD')
              board.append('PAD')
      else:
          board.append('PAD')
          board.append('PAD')
          board.append('PAD')
          board.append('PAD')
  board.append('HERO_BOUNDARY')
  random_bench_size = random.randint(1,10)
  for i in range(10):
      if i < random_bench_size:
          board.append(random.choice(units))
          bench_heros = board[-1]
      else:
          board.append('PAD')
  board.append('BENCH_BOUNDARY')
  for i in range(5):
      board.append(weighted_random_choice(units,['PAD']))
  board.append('SHOP_BOUNDARY')
  random_item_size = random.randint(2,15)
  for i in range(15):
      if i < random_item_size:
          board.append(weighted_random_choice(base_items,full_items))
          bench_items.append(board[-1])
      else:
          board.append('ITEM_SLOT')
  board.append('ITEM_BOUNDARY')
  random_augment_size = random.randint(0,3)
  for i in range(3):
      if i < random_augment_size:
          board.append(random.choice(augments))
      else:
          board.append('AUGMENT_SLOT')
  board.append('AUGMENT_BOUNDARY')
  board.append(random.choice(reroll_tokens))
  board.append('ACTION_SELL')
  board.append('MASK')
  board.append('PAD')
  board.append('PAD')
  board.append('PAD')
  board.append('EOS')
  return board

def make_penalty(board):
  penalty_vector = np.zeros(len(vocab))
  shop_units = set([unit for unit in board[hero_boundary+1:bench_boundary] if unit != 'PAD'])
  for unit in shop_units:
    penalty_vector[vocab[unit]] = 1/len(shop_units)
  return penalty_vector

In [None]:
board = make_board()
board

['GO',
 'Shaco',
 'ITEM_SLOT',
 'ITEM_SLOT',
 'ITEM_SLOT',
 'Kindred',
 'Spear of Shojin',
 'Adaptive Helm',
 'ITEM_SLOT',
 'Zed',
 "Tactician's Shield",
 'ITEM_SLOT',
 'ITEM_SLOT',
 'Vex',
 "Runaan's Hurricane",
 'Royal Crownshield',
 'ITEM_SLOT',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'PAD',
 'HERO_BOUNDARY',
 'Vex',
 'Vex',
 'Urgot',
 'Vi',
 'Rengar',
 "Kog'Maw",
 'Garen',
 'Senna',
 'Galio',
 'Illaoi',
 'BENCH_BOUNDARY',
 "Cho'Gath",
 'Shaco',
 'Zac',
 'Ziggs',
 'Xayah',
 'SHOP_BOUNDARY',
 'Recurve Bow',
 "Giant's Belt",
 'Negatron Cloak',
 'Chain Vest',
 'Chain Vest',
 'Negatron Cloak',
 'Negatron Cloak',
 'Needlessly Large Rod',
 "Giant's Belt",
 'ITEM_SLOT',
 'ITEM_SLOT',
 'ITEM_SLOT',
 'ITEM_SLOT',
 'ITEM_SLOT',
 'ITEM_SLOT',
 'ITEM_BOUNDARY',
 'Golden Fleece+',
 'Transistor',
 'AUGMENT_SLOT',
 'AUGMENT

In [None]:
penalty = make_penalty(board)
for unit in board[hero_boundary+1:bench_boundary]:
  print(unit, penalty[vocab[unit]])

Vex 0.1111111111111111
Vex 0.1111111111111111
Urgot 0.1111111111111111
Vi 0.1111111111111111
Rengar 0.1111111111111111
Kog'Maw 0.1111111111111111
Garen 0.1111111111111111
Senna 0.1111111111111111
Galio 0.1111111111111111
Illaoi 0.1111111111111111


In [None]:
sell_dataset_list = []
sell_penalties_list = []

for _ in tqdm.tqdm(range(200704)):
    board = make_board()
    penalty = make_penalty(board)
    sell_dataset_list.append(board)
    sell_penalties_list.append(penalty)

# Convert to DataFrames after
sell_dataset = pd.DataFrame(sell_dataset_list, columns=range(90))
sell_penalties = pd.DataFrame(sell_penalties_list, columns=range(len(vocab)))

100%|██████████| 200704/200704 [00:15<00:00, 12734.95it/s]


In [None]:
sell_dataset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,80,81,82,83,84,85,86,87,88,89
0,GO,Samira,ITEM_SLOT,ITEM_SLOT,ITEM_SLOT,Shyvana,Strategist Emblem,ITEM_SLOT,ITEM_SLOT,Cho'Gath,...,Vanguard Crest,AUGMENT_SLOT,AUGMENT_BOUNDARY,CAN_REROLL,ACTION_SELL,MASK,PAD,PAD,PAD,EOS
1,GO,Sylas,ITEM_SLOT,ITEM_SLOT,ITEM_SLOT,Annie,Unending Despair,Zenith Edge,ITEM_SLOT,Ziggs,...,Wandering Trainer I,Tagging Spree,AUGMENT_BOUNDARY,CAN_REROLL,ACTION_SELL,MASK,PAD,PAD,PAD,EOS
2,GO,Veigar,ITEM_SLOT,ITEM_SLOT,ITEM_SLOT,Shaco,Banshee's Veil,ITEM_SLOT,ITEM_SLOT,Galio,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CAN_REROLL,ACTION_SELL,MASK,PAD,PAD,PAD,EOS
3,GO,Sejuani,Guardbreaker,ITEM_SLOT,ITEM_SLOT,Aurora,Gold Collector,Guinsoo's Reckoning,ITEM_SLOT,Miss Fortune,...,Tomb Raider I,Epoch,AUGMENT_BOUNDARY,CAN_REROLL,ACTION_SELL,MASK,PAD,PAD,PAD,EOS
4,GO,Ekko,ITEM_SLOT,ITEM_SLOT,ITEM_SLOT,PAD,PAD,PAD,PAD,PAD,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CANT_REROLL,ACTION_SELL,MASK,PAD,PAD,PAD,EOS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200699,GO,Vayne,Morellonomicon,ITEM_SLOT,ITEM_SLOT,Brand,Street Demon Emblem,Infinity Edge,Gambler's Blade,Dr. Mundo,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CAN_REROLL,ACTION_SELL,MASK,PAD,PAD,PAD,EOS
200700,GO,Rengar,Needlessly Big Gem,ITEM_SLOT,ITEM_SLOT,Kindred,Rascal's Gloves,ITEM_SLOT,ITEM_SLOT,Nidalee,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CAN_REROLL,ACTION_SELL,MASK,PAD,PAD,PAD,EOS
200701,GO,Galio,Quicksilver,ITEM_SLOT,ITEM_SLOT,Twisted Fate,Needlessly Large Rod,Anima Squad Emblem,ITEM_SLOT,Brand,...,AUGMENT_SLOT,AUGMENT_SLOT,AUGMENT_BOUNDARY,CAN_REROLL,ACTION_SELL,MASK,PAD,PAD,PAD,EOS
200702,GO,Jax,Zhonya's Paradox,Virtue of the Martyr,ITEM_SLOT,Aphelios,Adaptive Helm,Zhonya's Paradox,ITEM_SLOT,Twisted Fate,...,Dummy With A Gun,AUGMENT_SLOT,AUGMENT_BOUNDARY,CAN_REROLL,ACTION_SELL,MASK,PAD,PAD,PAD,EOS


In [None]:
sell_penalties

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,536,537,538,539,540,541,542,543,544,545
0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.333333,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200699,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
200700,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0
200701,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
200702,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0


In [None]:
class MaskedTokenDataset(Dataset):
    def __init__(self, sequences, vocab, penalties):
        self.sequences = sequences
        self.vocab = vocab
        self.penalties = penalties

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        ids = self.sequences[idx]
        input_ids = [self.vocab[t] for t in ids]
        penalty = self.penalties[idx]
        return {'input':torch.tensor(input_ids), 'penalty': torch.tensor(penalty)}

def convert_test(sequence, vocab):
    input_ids = [vocab[t] for t in sequence]
    return torch.tensor(input_ids)

In [None]:
class MaskedTokenTransformer(nn.Module):
    def __init__(self, vocab_size, d_model=512, nhead=8, num_layers=4, dim_feedforward=512, dropout=.2):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.pos_embedding = nn.Embedding(90, d_model)
        self.dropout = nn.Dropout(dropout)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # Multiple heads, each for different outputs
        self.head = nn.Linear(d_model, vocab_size)

    def forward(self, x):
        positions = torch.arange(x.size(1), device=x.device).unsqueeze(0)
        x = self.embedding(x) + self.pos_embedding(positions)
        x = self.dropout(x)

        x = self.transformer(x)

        # Apply different heads to the output for different tasks
        logits = self.head(x)

        return logits

In [None]:
def custom_loss(outputs, target_indices, expected_classes):
    """
    outputs: Tensor of shape (batch_size, seq_len, vocab_size)
    target_indices: Tensor of shape (batch_size,) — the index of the token to supervise in each sequence
    expected_classes: Tensor of shape (batch_size,) — the expected class ID at that token
    """
    batch_size = outputs.size(0)

    # Get the logits at the target positions
    selected_logits = outputs[torch.arange(batch_size), target_indices]  # shape: (batch_size, vocab_size)
    # Apply cross-entropy loss at these positions
    loss = nn.functional.cross_entropy(selected_logits, expected_classes)
    return loss

In [None]:
epochs = 15
batch_size = 2048
sequences = sell_dataset.values.tolist()
expected = sell_penalties.values.tolist()

# Now use this in your dataset
data = MaskedTokenDataset(sequences, vocab, expected)
dataloader = DataLoader(data, batch_size=batch_size, drop_last=True)

model = MaskedTokenTransformer(len(vocab)).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=.001)

In [None]:
for epoch in range(epochs):
    total_loss = 0
    tot_penalty = 0
    for inputs in dataloader:
        input_ids = inputs['input'].to(device)
        expected_tensor = inputs['penalty'].to(device)
        optimizer.zero_grad()
        logits = model(input_ids)
        loss= custom_loss(logits, decision+1, expected_tensor)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")

Epoch 1/15 - Loss: 3.1619
Epoch 2/15 - Loss: 1.7627
Epoch 3/15 - Loss: 1.5702
Epoch 4/15 - Loss: 1.5223
Epoch 5/15 - Loss: 1.5058
Epoch 6/15 - Loss: 1.4971
Epoch 7/15 - Loss: 1.4922
Epoch 8/15 - Loss: 1.4890
Epoch 9/15 - Loss: 1.4868
Epoch 10/15 - Loss: 1.4852
Epoch 11/15 - Loss: 1.4842
Epoch 12/15 - Loss: 1.4838
Epoch 13/15 - Loss: 1.4826
Epoch 14/15 - Loss: 1.4821
Epoch 15/15 - Loss: 1.4818


In [None]:
def evaluate(model, input_ids, masked_index):
    """
    input_ids: Tensor of shape (1, seq_len) — single input sequence with a [MASK] token
    masked_index: int — the position of the masked token in the sequence
    tokenizer: optional — to decode predicted token id to string
    """
    model.eval()
    with torch.no_grad():
        logits = model(input_ids)  # shape: (1, seq_len, vocab_size)
        masked_logits = logits[0, masked_index]  # shape: (vocab_size,)
        positive_indices = (masked_logits > 6).nonzero(as_tuple=True)[0]  # 1D tensor of indices
        predicted_id = masked_logits.argmax(dim=-1).item()
        print(f"Predicted token ID at index {masked_index}: {inv_vocab[predicted_id]}")
        for idx in positive_indices:
          logit_value = masked_logits[idx].item()
          print(f"Token ID {idx.item():>5} | Logit: {logit_value:.8f} | Token: {inv_vocab[idx.item()]}")

    return predicted_id

In [None]:
for i in range(5):
  board = make_board()
  print(board[hero_boundary+1:bench_boundary])
  input_ids = [vocab[t] for t in board]
  id = evaluate(model, torch.tensor(input_ids).unsqueeze(0).to(device), decision+1)

['Galio', 'Ziggs', 'Graves', 'Zyra', 'Gragas', "Kog'Maw", 'PAD', 'PAD', 'PAD', 'PAD']
Predicted token ID at index 85: Graves
Token ID     0 | Logit: 11.51461506 | Token: Ziggs
Token ID   112 | Logit: 11.58910465 | Token: Graves
Token ID   133 | Logit: 11.43199062 | Token: Gragas
Token ID   147 | Logit: 11.50903130 | Token: Zyra
Token ID   302 | Logit: 11.56119823 | Token: Kog'Maw
Token ID   430 | Logit: 11.40425396 | Token: Galio
['Elise', 'Xayah', "Cho'Gath", 'Zeri', 'Darius', 'Ziggs', 'Dr. Mundo', 'Brand', 'Seraphine', 'Vex']
Predicted token ID at index 85: Brand
Token ID     0 | Logit: 9.08441257 | Token: Ziggs
Token ID   110 | Logit: 9.16055965 | Token: Cho'Gath
Token ID   193 | Logit: 9.00421333 | Token: Xayah
Token ID   234 | Logit: 9.18725109 | Token: Elise
Token ID   252 | Logit: 9.24287033 | Token: Brand
Token ID   375 | Logit: 9.08117294 | Token: Seraphine
Token ID   415 | Logit: 9.16068649 | Token: Darius
Token ID   456 | Logit: 9.05627918 | Token: Vex
Token ID   502 | Logit

In [None]:
torch.save(model.state_dict(), "sell_model.pth")