In [None]:
import torch
import tqdm
import pickle
import random
import pandas as pd
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [None]:
augments = []
base_items = []
full_items = []
units = []
reroll_tokens = ['CAN_REROLL', 'CANT_REROLL']
action_tokens = ['ACTION_BUY', 'ACTION_SELL', 'ACTION_MOVE', 'ACTION_ITEM',
                 'ACTION_AUGMENT', 'ACTION_REROLL', 'ACTION_NONE']
other_tokens = ['GO','EOS', 'MASK', 'PAD', 'HERO_BOUNDARY', 'BENCH_BOUNDARY', 'SHOP_BOUNDARY',
                'ITEM_BOUNDARY', 'AUGMENT_BOUNDARY', 'ITEM_SLOT', 'AUGMENT_SLOT', 'HERO_SLOT']

with open('augments.csv', 'r') as f:
    for line in f:
        # Strip newline and commas, then split by comma and join characters
        tokens = line.strip().split(',')
        string = ''.join(tokens)
        augments.append(string)
with open('base_items.csv', 'r') as f:
    for line in f:
        # Strip newline and commas, then split by comma and join characters
        tokens = line.strip().split(',')
        string = ''.join(tokens)
        base_items.append(string)
with open('full_items.csv', 'r') as f:
    for line in f:
        # Strip newline and commas, then split by comma and join characters
        tokens = line.strip().split(',')
        string = ''.join(tokens)
        full_items.append(string)
with open('units.csv', 'r') as f:
    for line in f:
        # Strip newline and commas, then split by comma and join characters
        tokens = line.strip().split(',')
        string = ''.join(tokens)
        units.append(string)
augments[augments.index('Cower"" Weaklings!')] = 'Cower, Weaklings!'
augments[augments.index('One"" Two"" Five!')] = 'One, Two, Five!'
augments[augments.index('10""000 IQ')] = '10,000 IQ'
augments[augments.index('One Buff"" Two Buff')] = 'One Buff, Two Buff'
tokens = augments + base_items + full_items + units + reroll_tokens + action_tokens + other_tokens

In [None]:
with open('vocab.pkl', 'rb') as f:
    vocab = pickle.load(f)
with open('inv_vocab.pkl', 'rb') as f:
    inv_vocab = pickle.load(f)
vocab

{'Ziggs': 0,
 'Pair of Fours': 1,
 'Blazing Soul II': 2,
 "Zhonya's Paradox": 3,
 'Illaoi': 4,
 'Cower, Weaklings!': 5,
 "Slammin'+": 6,
 'ReinFOURcement': 7,
 'Adaptive Strikes': 8,
 'Rapidfire Crest': 9,
 'Syndicate Emblem': 10,
 "Nashor's Tooth": 11,
 'Statikk Shiv': 12,
 'ITEM_SLOT': 13,
 'Roll The Dice': 14,
 'Iron Assets': 15,
 'SHOP_BOUNDARY': 16,
 'Giant Slayer': 17,
 'Golden Fleece': 18,
 'Slayer Crown': 19,
 'One For All II': 20,
 'Recurve Bow': 21,
 'Senna': 22,
 "I'm the Carry Now": 23,
 'Prismatic Ticket': 24,
 'CAN_REROLL': 25,
 'Chain Vest': 26,
 'HERO_BOUNDARY': 27,
 'Missed Connections': 28,
 'Lucky Gloves': 29,
 'Bastion Crown': 30,
 'Marksman Circlet': 31,
 'Placebo': 32,
 'Bastion Crest': 33,
 'Cooking Pot': 34,
 'Marksman Crest': 35,
 'Eye For An Eye+': 36,
 'Void Swarm': 37,
 'Slayer Emblem': 38,
 'Jhin': 39,
 'New High Score': 40,
 'Starry Night+': 41,
 'Vi': 42,
 'Dynamo Circlet': 43,
 "Pandora's Items III": 44,
 'Anima Visage': 45,
 'Slayer Crest': 46,
 'Zephyr

In [None]:
hero_boundary, bench_boundary, shop_boundary, item_boundary, augment_boundary, reroll, decision = 45, 56, 62, 78, 82, 83, 84


In [None]:
def weighted_random_choice(arr1, arr2, weight=0.9):
    """
    Selects a random index from arr1 or arr2 with higher probability for arr1.

    Args:
    - arr1: First array (higher probability selection).
    - arr2: Second array (lower probability selection).
    - weight: Probability of choosing arr1 (default 80%).

    Returns:
    - (index, selected_array) where index is from the chosen array.
    """
    total_length = len(arr1) + len(arr2)

    # Choose which array to pick from
    from_arr1 = random.choices([True, False], weights=[weight, 1 - weight])[0]

    if from_arr1:
        index = random.randint(0, len(arr1) - 1)
        return arr1[index]
    else:
        index = random.randint(0, len(arr2) - 1)
        return arr2[index]

def make_board():
  board = ['GO']
  board_heros = []
  bench_items = []
  bench_heros = []
  random_board_size = random.randint(1, 11)
  for i in range(11):
      if i < random_board_size:
          board.append(random.choice(units))
          board_heros.append(board[-1])
          random_items = random.randint(0,3)
          for j in range(3):
              if j < random_items:
                  board.append(weighted_random_choice(full_items, base_items, weight=.95))
              else:
                  board.append('ITEM_SLOT')
      elif i == random_board_size:
          open_slot = weighted_random_choice(['PAD'], ['HERO_SLOT'], weight =.6)
          if open_slot == 'HERO_SLOT':
              board.append(open_slot)
              board.append('ITEM_SLOT')
              board.append('ITEM_SLOT')
              board.append('ITEM_SLOT')
          else:
              board.append('PAD')
              board.append('PAD')
              board.append('PAD')
              board.append('PAD')
      else:
          board.append('PAD')
          board.append('PAD')
          board.append('PAD')
          board.append('PAD')
  board.append('HERO_BOUNDARY')
  random_bench_size = random.randint(0,10)
  for i in range(10):
      if i < random_bench_size:
          board.append(random.choice(units))
          bench_heros = board[-1]
      else:
          board.append('PAD')
  board.append('BENCH_BOUNDARY')
  for i in range(5):
      board.append(random.choice(units))
  board.append('SHOP_BOUNDARY')
  random_item_size = random.randint(0,15)
  for i in range(15):
      if i < random_item_size:
          board.append(weighted_random_choice(base_items,full_items))
          bench_items.append(board[-1])
      else:
          board.append('ITEM_SLOT')
  board.append('ITEM_BOUNDARY')
  random_augment_size = random.randint(0,3)
  for i in range(3):
      if i < random_augment_size:
          board.append(random.choice(augments))
      else:
          board.append('AUGMENT_SLOT')
  board.append('AUGMENT_BOUNDARY')
  board.append(random.choice(reroll_tokens))
  board.append('MASK')
  board.append('PAD')
  board.append('PAD')
  board.append('PAD')
  board.append('PAD')
  board.append("EOS")
  return board

def make_penalty(board):
  penalty_vector = np.zeros(len(vocab))
  possible_tokens = ['ACTION_BUY','ACTION_SELL','ACTION_MOVE', 'ACTION_NONE', 'ACTION_ITEM', 'ACTION_REROLL']
  actual_items = [item for item in board[shop_boundary+1:item_boundary] if item != 'ITEM_SLOT']
  has_bench = any(unit != 'PAD' for unit in board[hero_boundary+1:bench_boundary])
  item_buff = 0
  if 'HERO_SLOT' in board and has_bench:
    penalty_vector[vocab['ACTION_MOVE']] = 1
    return penalty_vector
  elif 'HERO_SLOT' in board and not has_bench:
    penalty_vector[vocab['ACTION_BUY']] = 1
    return penalty_vector
  elif any(item in full_items for item in actual_items):
    penalty_vector[vocab['ACTION_ITEM']] = 1
    return penalty_vector
  if board[reroll] == 'CANT_REROLL':
    possible_tokens.remove('ACTION_REROLL')
  if len(actual_items) == 0:
    possible_tokens.remove('ACTION_ITEM')
  if not has_bench:
    possible_tokens.remove('ACTION_MOVE')
    possible_tokens.remove('ACTION_SELL')
  if len(actual_items) >1:
    item_buff = 1
  weight = 1/(len(possible_tokens) + item_buff)
  for action in possible_tokens:
    if not item_buff:
      penalty_vector[vocab[action]] = weight
    else:
      if action != 'ACTION_ITEM':
        penalty_vector[vocab[action]] = weight
      else:
        penalty_vector[vocab[action]] = 2* weight
  return penalty_vector

In [None]:
test_board = ['GO', 'Varus', 'Golden Ox Emblem', 'Executioner Emblem', 'ITEM_SLOT', 'Brand', 'Demonslayer', 'Chalice of Power', 'Dynamo Emblem', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'HERO_BOUNDARY', 'Alistar', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'BENCH_BOUNDARY', 'Aurora', 'Kindred', "Cho'Gath", 'Senna', 'Samira', 'SHOP_BOUNDARY', 'B.F. Sword', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_BOUNDARY', 'Support Mining', 'Max Cap', 'AUGMENT_SLOT', 'AUGMENT_BOUNDARY', 'CAN_REROLL', 'MASK', 'PAD', 'PAD', 'PAD', 'PAD', 'EOS']
penalty = make_penalty(test_board)
print(penalty[vocab['ACTION_BUY']],penalty[vocab['ACTION_MOVE']] ,penalty[vocab['ACTION_SELL']],penalty[vocab['ACTION_ITEM']],penalty[vocab['ACTION_REROLL']], penalty[vocab['ACTION_NONE']])
test_board = ['GO', 'Varus', 'Golden Ox Emblem', 'Executioner Emblem', 'ITEM_SLOT', 'Brand', 'Demonslayer', 'Chalice of Power', 'Dynamo Emblem', 'HERO_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'HERO_BOUNDARY', 'Alistar', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'BENCH_BOUNDARY', 'Aurora', 'Kindred', "Cho'Gath", 'Senna', 'Samira', 'SHOP_BOUNDARY', 'B.F. Sword', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_BOUNDARY', 'Support Mining', 'Max Cap', 'AUGMENT_SLOT', 'AUGMENT_BOUNDARY', 'CAN_REROLL', 'MASK', 'PAD', 'PAD', 'PAD', 'PAD', 'EOS']
penalty = make_penalty(test_board)
print(penalty[vocab['ACTION_BUY']],penalty[vocab['ACTION_MOVE']] ,penalty[vocab['ACTION_SELL']],penalty[vocab['ACTION_ITEM']],penalty[vocab['ACTION_REROLL']], penalty[vocab['ACTION_NONE']])
test_board = ['GO', 'Varus', 'Golden Ox Emblem', 'Executioner Emblem', 'ITEM_SLOT', 'Brand', 'Demonslayer', 'Chalice of Power', 'Dynamo Emblem', 'HERO_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'HERO_BOUNDARY', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'BENCH_BOUNDARY', 'Aurora', 'Kindred', "Cho'Gath", 'Senna', 'Samira', 'SHOP_BOUNDARY', 'B.F. Sword', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_BOUNDARY', 'Support Mining', 'Max Cap', 'AUGMENT_SLOT', 'AUGMENT_BOUNDARY', 'CAN_REROLL', 'MASK', 'PAD', 'PAD', 'PAD', 'PAD', 'EOS']
penalty = make_penalty(test_board)
print(penalty[vocab['ACTION_BUY']],penalty[vocab['ACTION_MOVE']] ,penalty[vocab['ACTION_SELL']],penalty[vocab['ACTION_ITEM']],penalty[vocab['ACTION_REROLL']], penalty[vocab['ACTION_NONE']])
test_board = ['GO', 'Varus', 'Golden Ox Emblem', 'Executioner Emblem', 'ITEM_SLOT', 'Brand', 'Demonslayer', 'Chalice of Power', 'Dynamo Emblem', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'HERO_BOUNDARY', 'Alistar', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'BENCH_BOUNDARY', 'Aurora', 'Kindred', "Cho'Gath", 'Senna', 'Samira', 'SHOP_BOUNDARY', 'B.F. Sword', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_BOUNDARY', 'Support Mining', 'Max Cap', 'AUGMENT_SLOT', 'AUGMENT_BOUNDARY', 'CANT_REROLL', 'MASK', 'PAD', 'PAD', 'PAD', 'PAD', 'EOS']
penalty = make_penalty(test_board)
print(penalty[vocab['ACTION_BUY']],penalty[vocab['ACTION_MOVE']] ,penalty[vocab['ACTION_SELL']],penalty[vocab['ACTION_ITEM']],penalty[vocab['ACTION_REROLL']], penalty[vocab['ACTION_NONE']])
test_board = ['GO', 'Varus', 'Golden Ox Emblem', 'Executioner Emblem', 'ITEM_SLOT', 'Brand', 'Demonslayer', 'Chalice of Power', 'Dynamo Emblem', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'HERO_BOUNDARY', 'Alistar', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'BENCH_BOUNDARY', 'Aurora', 'Kindred', "Cho'Gath", 'Senna', 'Samira', 'SHOP_BOUNDARY', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_BOUNDARY', 'Support Mining', 'Max Cap', 'AUGMENT_SLOT', 'AUGMENT_BOUNDARY', 'CANT_REROLL', 'MASK', 'PAD', 'PAD', 'PAD', 'PAD', 'EOS']
penalty = make_penalty(test_board)
print(penalty[vocab['ACTION_BUY']],penalty[vocab['ACTION_MOVE']] ,penalty[vocab['ACTION_SELL']],penalty[vocab['ACTION_ITEM']],penalty[vocab['ACTION_REROLL']], penalty[vocab['ACTION_NONE']])
test_board = ['GO', 'Varus', 'Golden Ox Emblem', 'Executioner Emblem', 'ITEM_SLOT', 'Brand', 'Demonslayer', 'Chalice of Power', 'Dynamo Emblem', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'HERO_BOUNDARY', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'BENCH_BOUNDARY', 'Aurora', 'Kindred', "Cho'Gath", 'Senna', 'Samira', 'SHOP_BOUNDARY', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_BOUNDARY', 'Support Mining', 'Max Cap', 'AUGMENT_SLOT', 'AUGMENT_BOUNDARY', 'CANT_REROLL', 'MASK', 'PAD', 'PAD', 'PAD', 'PAD', 'EOS']
penalty = make_penalty(test_board)
print(penalty[vocab['ACTION_BUY']],penalty[vocab['ACTION_MOVE']] ,penalty[vocab['ACTION_SELL']],penalty[vocab['ACTION_ITEM']],penalty[vocab['ACTION_REROLL']], penalty[vocab['ACTION_NONE']])
test_board = ['GO', 'Varus', 'Golden Ox Emblem', 'Executioner Emblem', 'ITEM_SLOT', 'Brand', 'Demonslayer', 'Chalice of Power', 'Dynamo Emblem', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'HERO_BOUNDARY', 'Alistar', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'BENCH_BOUNDARY', 'Aurora', 'Kindred', "Cho'Gath", 'Senna', 'Samira', 'SHOP_BOUNDARY', 'Fishbones', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_BOUNDARY', 'Support Mining', 'Max Cap', 'AUGMENT_SLOT', 'AUGMENT_BOUNDARY', 'CANT_REROLL', 'MASK', 'PAD', 'PAD', 'PAD', 'PAD', 'EOS']
penalty = make_penalty(test_board)
print(penalty[vocab['ACTION_BUY']],penalty[vocab['ACTION_MOVE']] ,penalty[vocab['ACTION_SELL']],penalty[vocab['ACTION_ITEM']],penalty[vocab['ACTION_REROLL']], penalty[vocab['ACTION_NONE']])
test_board = ['GO', 'Varus', 'Golden Ox Emblem', 'Executioner Emblem', 'ITEM_SLOT', 'Brand', 'Demonslayer', 'Chalice of Power', 'Dynamo Emblem', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'HERO_BOUNDARY', 'Alistar', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'BENCH_BOUNDARY', 'Aurora', 'Kindred', "Cho'Gath", 'Senna', 'Samira', 'SHOP_BOUNDARY', 'B.F. Sword', 'B.F. Sword', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_BOUNDARY', 'Support Mining', 'Max Cap', 'AUGMENT_SLOT', 'AUGMENT_BOUNDARY', 'CAN_REROLL', 'MASK', 'PAD', 'PAD', 'PAD', 'PAD', 'EOS']
penalty = make_penalty(test_board)
print(penalty[vocab['ACTION_BUY']],penalty[vocab['ACTION_MOVE']] ,penalty[vocab['ACTION_SELL']],penalty[vocab['ACTION_ITEM']],penalty[vocab['ACTION_REROLL']], penalty[vocab['ACTION_NONE']])
test_board = ['GO', 'Varus', 'Golden Ox Emblem', 'Executioner Emblem', 'ITEM_SLOT', 'Brand', 'Demonslayer', 'Chalice of Power', 'Dynamo Emblem', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'HERO_BOUNDARY', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'BENCH_BOUNDARY', 'Aurora', 'Kindred', "Cho'Gath", 'Senna', 'Samira', 'SHOP_BOUNDARY', 'B.F. Sword', 'B.F. Sword', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_BOUNDARY', 'Support Mining', 'Max Cap', 'AUGMENT_SLOT', 'AUGMENT_BOUNDARY', 'CANT_REROLL', 'MASK', 'PAD', 'PAD', 'PAD', 'PAD', 'EOS']
penalty = make_penalty(test_board)
print(penalty[vocab['ACTION_BUY']],penalty[vocab['ACTION_MOVE']] ,penalty[vocab['ACTION_SELL']],penalty[vocab['ACTION_ITEM']],penalty[vocab['ACTION_REROLL']], penalty[vocab['ACTION_NONE']])

0.16666666666666666 0.16666666666666666 0.16666666666666666 0.16666666666666666 0.16666666666666666 0.16666666666666666
0.0 1.0 0.0 0.0 0.0 0.0
1.0 0.0 0.0 0.0 0.0 0.0
0.2 0.2 0.2 0.2 0.0 0.2
0.25 0.25 0.25 0.0 0.0 0.25
0.5 0.0 0.0 0.0 0.0 0.5
0.0 0.0 0.0 1.0 0.0 0.0
0.14285714285714285 0.14285714285714285 0.14285714285714285 0.2857142857142857 0.14285714285714285 0.14285714285714285
0.25 0.0 0.0 0.5 0.0 0.25


In [None]:
dataset_list = []
penalties_list = []

for _ in tqdm.tqdm(range(100352)):
    board = make_board()
    penalty = make_penalty(board)
    dataset_list.append(board)
    penalties_list.append(penalty)

# Convert to DataFrames after
dataset = pd.DataFrame(dataset_list, columns=range(90))
penalties = pd.DataFrame(penalties_list, columns=range(len(vocab)))

100%|██████████| 100352/100352 [00:07<00:00, 13621.47it/s]


In [None]:
class MaskedTokenDataset(Dataset):
    def __init__(self, sequences, vocab, penalties):
        self.sequences = sequences
        self.vocab = vocab
        self.penalties = penalties

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        ids = self.sequences[idx]
        input_ids = [self.vocab[t] for t in ids]
        penalty = self.penalties[idx]
        return {'input':torch.tensor(input_ids), 'penalty': torch.tensor(penalty)}

def convert_test(sequence, vocab):
    input_ids = [vocab[t] for t in sequence]
    return torch.tensor(input_ids)

In [None]:
class MaskedTokenTransformer(nn.Module):
    def __init__(self, vocab_size, d_model=512, nhead=8, num_layers=4, dim_feedforward=512, dropout=.2):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.pos_embedding = nn.Embedding(90, d_model)
        self.dropout = nn.Dropout(dropout)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # Multiple heads, each for different outputs
        self.head = nn.Linear(d_model, vocab_size)

    def forward(self, x):
        positions = torch.arange(x.size(1), device=x.device).unsqueeze(0)
        x = self.embedding(x) + self.pos_embedding(positions)
        x = self.dropout(x)

        x = self.transformer(x)

        # Apply different heads to the output for different tasks
        logits = self.head(x)

        return logits

In [None]:
def custom_loss(outputs, target_indices, expected_classes):
    """
    outputs: Tensor of shape (batch_size, seq_len, vocab_size)
    target_indices: Tensor of shape (batch_size,) — the index of the token to supervise in each sequence
    expected_classes: Tensor of shape (batch_size,) — the expected class ID at that token
    """
    batch_size = outputs.size(0)

    # Get the logits at the target positions
    selected_logits = outputs[torch.arange(batch_size), target_indices]  # shape: (batch_size, vocab_size)
    # Apply cross-entropy loss at these positions
    loss = nn.functional.cross_entropy(selected_logits, expected_classes)
    return loss

In [None]:
epochs =8
batch_size = 2048
sequences = dataset.values.tolist()
expected = penalties.values.tolist()

# Now use this in your dataset
data = MaskedTokenDataset(sequences, vocab, expected)
dataloader = DataLoader(data, batch_size=batch_size, drop_last=True)

model = MaskedTokenTransformer(len(vocab)).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

In [None]:
for epoch in range(epochs):
    total_loss = 0
    tot_penalty = 0
    for inputs in dataloader:
        input_ids = inputs['input'].to(device)
        expected_tensor = inputs['penalty'].to(device)
        optimizer.zero_grad()
        logits= model(input_ids)
        loss= custom_loss(logits, decision, expected_tensor)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")

Epoch 1/8 - Loss: 2.0360
Epoch 2/8 - Loss: 1.1674
Epoch 3/8 - Loss: 0.8614
Epoch 4/8 - Loss: 0.7619
Epoch 5/8 - Loss: 0.6511
Epoch 6/8 - Loss: 0.5705
Epoch 7/8 - Loss: 0.5459
Epoch 8/8 - Loss: 0.5377


In [None]:
def evaluate(model, input_ids, masked_index):
    """
    input_ids: Tensor of shape (1, seq_len) — single input sequence with a [MASK] token
    masked_index: int — the position of the masked token in the sequence
    tokenizer: optional — to decode predicted token id to string
    """
    model.eval()
    with torch.no_grad():
        logits= model(input_ids)  # shape: (1, seq_len, vocab_size)
        masked_logits = logits[0, masked_index]  # shape: (vocab_size,)
        positive_indices = (masked_logits > 0).nonzero(as_tuple=True)[0]  # 1D tensor of indices

        for idx in positive_indices:
          logit_value = masked_logits[idx].item()
          print(f"Token ID {idx.item():>5} | Logit: {logit_value:.8f} | Token: {inv_vocab[idx.item()]}")

        predicted_id = masked_logits.argmax(dim=-1).item()
        print(f"Predicted token ID at index {masked_index}: {inv_vocab[predicted_id]}")

    return predicted_id


In [None]:
for i in range(10):
  board = make_board()
  print(board)
  input_ids = [vocab[t] for t in board]
  id = evaluate(model, torch.tensor(input_ids).unsqueeze(0).to(device), decision)

['GO', 'Garen', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'Jinx', 'Chain Vest', 'ITEM_SLOT', 'ITEM_SLOT', 'Renekton', "Blacksmith's Gloves", "Giant's Belt", 'Crownguard', 'Shyvana', 'Ionic Spark', "Warmog's Pride", 'ITEM_SLOT', 'Mordekaiser', "Nashor's Tooth", 'ITEM_SLOT', 'ITEM_SLOT', 'Galio', 'Bramble Vest', 'Shroud of Stillness', 'ITEM_SLOT', 'Zyra', 'Bastion Emblem', "Rascal's Gloves", 'ITEM_SLOT', 'Zac', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'Alistar', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'Rengar', 'Tear of the Goddess', 'Edge of Night', 'ITEM_SLOT', 'HERO_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'HERO_BOUNDARY', 'Aurora', 'Zyra', 'Aurora', 'Shyvana', 'Yuumi', 'Leona', 'Yuumi', 'Yuumi', 'LeBlanc', 'Morgana', 'BENCH_BOUNDARY', 'Vi', 'Zac', 'Braum', 'Senna', 'Twisted Fate', 'SHOP_BOUNDARY', 'Tear of the Goddess', 'Tear of the Goddess', 'Needlessly Large Rod', "Giant's Belt", 'Recurve Bow', "Giant's Belt", 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SL

In [None]:
epochs = 4
for epoch in range(epochs):
    total_loss = 0
    tot_penalty = 0
    for inputs in dataloader:
        input_ids = inputs['input'].to(device)
        expected_tensor = inputs['penalty'].to(device)
        optimizer.zero_grad()
        logits= model(input_ids)
        loss= custom_loss(logits, decision, expected_tensor)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")

Epoch 1/4 - Loss: 0.5240
Epoch 2/4 - Loss: 0.5219
Epoch 3/4 - Loss: 0.5213
Epoch 4/4 - Loss: 0.5210


In [None]:
for i in range(10):
  board = make_board()
  print(board)
  input_ids = [vocab[t] for t in board]
  id = evaluate(model, torch.tensor(input_ids).unsqueeze(0).to(device), decision)

['GO', 'Vayne', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'Alistar', "Sterak's Megashield", 'Locket of the Iron Solari', 'ITEM_SLOT', 'Miss Fortune', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'HERO_BOUNDARY', 'Galio', 'Garen', 'Zyra', 'Graves', 'Dr. Mundo', 'Kobuko', 'Draven', 'PAD', 'PAD', 'PAD', 'BENCH_BOUNDARY', 'Ziggs', 'Viego', 'Zeri', 'Brand', 'Morgana', 'SHOP_BOUNDARY', 'Needlessly Large Rod', 'Sparring Gloves', 'Needlessly Large Rod', 'Sparring Gloves', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_SLOT', 'ITEM_BOUNDARY', 'Going Long', 'AUGMENT_SLOT', 'AUGMENT_SLOT', 'AUGMENT_BOUNDARY', 'CANT_REROLL', 'MASK', 'PAD', 'PAD', 'PAD', 'PAD', 'EOS']
Token ID   271 | Logit: 7.564491

In [None]:
torch.save(model.state_dict(), "mask_model.pth")