In [1]:
from utils import load_data
trainset = load_data("trainset.pkl")
testset = load_data("testset.pkl")
print(len(trainset))
print(len(testset))

480000
160000


In [4]:
import numpy as np
from utils import State

class FeatureExtractor:
    FEATURES = 13

    def extract_features(self, state: State):
        if state.fill_num == 2:
            state = self._inverse_state(state)

        status = state.local_board_status
        features = []
        for x in range(3):
            for y in range(3):
                board = state.board[x, y]
                get_value = [0, 1, -1]
                features.append([
                    self._check_actionable(state.prev_local_action, status, x, y),
                    1 if status[x, y] == 1 else 0,
                    1 if status[x, y] == 2 else 0,
                    1 if status[x, y] == 3 else 0,
                    get_value[board[0, 0]],
                    get_value[board[0, 1]],
                    get_value[board[0, 2]],
                    get_value[board[1, 0]],
                    get_value[board[1, 1]],
                    get_value[board[1, 2]],
                    get_value[board[2, 0]],
                    get_value[board[2, 1]],
                    get_value[board[2, 2]],
                ])
        return features

    def _inverse_state(self, state):
        inversed_fill_num = 2 if state.fill_num == 1 else 1
        board = state.board
        inversed_board = np.where(board == 1, 2, np.where(board == 2, 1, board))

        return State(board=inversed_board, fill_num=inversed_fill_num, prev_local_action=state.prev_local_action)

    def _check_actionable(self, prev_action, status, x, y):
        if status[x, y] != 0:
            return 0
        if prev_action is None:
            return 1
        if status[prev_action[0], prev_action[1]] != 0:
            return 1
        return 1 if x == prev_action[0] and y == prev_action[1] else 0

In [5]:
feature_extractor = FeatureExtractor()
X_train = []
y_train = []

for i, (state, value) in enumerate(trainset):
    X_train.append(feature_extractor.extract_features(state))
    y_train.append(value)
    if i % 10000 == 9999:
        print(f"Processed {i + 1} data in trainset")

X_test = []
y_test = []
for i, (state, value) in enumerate(testset):
    X_test.append(feature_extractor.extract_features(state))
    y_test.append(value)
    if i % 10000 == 9999:
        print(f"Processed {i + 1} data in testset")

Processed 10000 data in trainset
Processed 20000 data in trainset
Processed 30000 data in trainset
Processed 40000 data in trainset
Processed 50000 data in trainset
Processed 60000 data in trainset
Processed 70000 data in trainset
Processed 80000 data in trainset
Processed 90000 data in trainset
Processed 100000 data in trainset
Processed 110000 data in trainset
Processed 120000 data in trainset
Processed 130000 data in trainset
Processed 140000 data in trainset
Processed 150000 data in trainset
Processed 160000 data in trainset
Processed 170000 data in trainset
Processed 180000 data in trainset
Processed 190000 data in trainset
Processed 200000 data in trainset
Processed 210000 data in trainset
Processed 220000 data in trainset
Processed 230000 data in trainset
Processed 240000 data in trainset
Processed 250000 data in trainset
Processed 260000 data in trainset
Processed 270000 data in trainset
Processed 280000 data in trainset
Processed 290000 data in trainset
Processed 300000 data i

In [7]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1).to(device)

print("X_train.shape:", X_train.shape)
print("X_test.shape:", X_test.shape)
print("y_train.shape:", y_train.shape)
print("y_test.shape:", y_test.shape)

X_train.shape: torch.Size([480000, 9, 13])
X_test.shape: torch.Size([160000, 9, 13])
y_train.shape: torch.Size([480000, 1])
y_test.shape: torch.Size([160000, 1])


  X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
  X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
  y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1).to(device)
  y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1).to(device)


In [None]:
import torch.nn as nn
import torch.nn.functional as F

class Heuristic(nn.Module):
    class LocalNN(nn.Module):
        def __init__(self):
            super(Heuristic.LocalNN, self).__init__()
            self.fc1 = nn.Linear(13, 32)
            self.fc2 = nn.Linear(32, 32)
            self.fc3 = nn.Linear(32, 32)
            self.fc4 = nn.Linear(32, 1)
        
        def forward(self, x):
            x = F.gelu(self.fc1(x))
            x = F.gelu(self.fc2(x))
            x = F.gelu(self.fc3(x))
            x = F.tanh(self.fc4(x))
            return x

    def __init__(self):
        super(Heuristic, self).__init__()
        self.localNN = Heuristic.LocalNN()
        self.fc1 = nn.Linear(9, 32)
        self.fc2 = nn.Linear(32, 32)
        self.fc3 = nn.Linear(32, 32)
        self.fc4 = nn.Linear(32, 1)

    def forward(self, x):
        x = self.localNN(x)
        x = F.gelu(self.fc1(x.squeeze(-1)))
        x = F.gelu(self.fc2(x))
        x = F.gelu(self.fc3(x))
        x = F.tanh(self.fc4(x))
        return x

In [None]:
import math
from torch import optim
from torch.utils.data import DataLoader, TensorDataset

model = Heuristic().to(device)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
dataset = TensorDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=math.ceil(len(y_train)/1000), shuffle=True)

epochs = 10

for epoch in range(epochs):
    total_loss = 0.0
    for i, (X_batch, y_batch) in enumerate(dataloader):
        X_batch.to(device)
        y_batch.to(device)
        
        optimizer.zero_grad()
        pred = model(X_batch)
        loss = loss_fn(pred, y_batch)
        total_loss += loss.item()
        loss.backward()
        optimizer.step()

        if i % 100 == 99:
            print(f"Epoch [{epoch + 1}/{epochs}], Step [{i + 1}/{len(dataloader)}], Loss: {loss.item():.4f}")

    average_loss = total_loss / len(dataloader)
    print(f"Epoch [{epoch + 1}/{epochs}], Average loss: {average_loss:.4f}")

Epoch [1/10], Step [100/1000], Loss: 0.3012
Epoch [1/10], Step [200/1000], Loss: 0.2442
Epoch [1/10], Step [300/1000], Loss: 0.2060
Epoch [1/10], Step [400/1000], Loss: 0.1630
Epoch [1/10], Step [500/1000], Loss: 0.1435
Epoch [1/10], Step [600/1000], Loss: 0.1549
Epoch [1/10], Step [700/1000], Loss: 0.1568
Epoch [1/10], Step [800/1000], Loss: 0.1441
Epoch [1/10], Step [900/1000], Loss: 0.1175
Epoch [1/10], Step [1000/1000], Loss: 0.1370
Epoch [1/10], Average loss: 0.1997
Epoch [2/10], Step [100/1000], Loss: 0.1337
Epoch [2/10], Step [200/1000], Loss: 0.1257
Epoch [2/10], Step [300/1000], Loss: 0.1512
Epoch [2/10], Step [400/1000], Loss: 0.1045
Epoch [2/10], Step [500/1000], Loss: 0.1366
Epoch [2/10], Step [600/1000], Loss: 0.1238
Epoch [2/10], Step [700/1000], Loss: 0.1193
Epoch [2/10], Step [800/1000], Loss: 0.1399
Epoch [2/10], Step [900/1000], Loss: 0.0969
Epoch [2/10], Step [1000/1000], Loss: 0.1174
Epoch [2/10], Average loss: 0.1256
Epoch [3/10], Step [100/1000], Loss: 0.1257
Epoc

In [34]:
torch.set_printoptions(precision=10, threshold=torch.inf)
model.eval()
with torch.no_grad():
    pred = model(X_test)
    error = loss_fn(pred, y_test).item()
print("Test error:", error)

Test error: 0.10132867097854614


In [28]:
trained_weights = model.state_dict()
with open("heuristic.txt", "w") as f:
    for key, value in trained_weights.items():
        value = value.detach().cpu().numpy().tolist()
        f.write(f"{key.upper()}: {value}\n")