In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical
import numpy as np
import time
import random

In [2]:
file_name = 'kallah_neyron_prelu.pt'

In [3]:
def model_init(name):
    DL = 14
    Prs = 78

    model = 0

    if os.path.exists(name):
        model = torch.load(name)
        print("model loaded: ", name)
    else:
        model = nn.Sequential(
            nn.Linear(DL,Prs),
            nn.PReLU(),
            nn.Linear(Prs, 6),
            nn.Softmax()
        )
        print("model init")

    return model

In [4]:
def space(board):
    return torch.tensor(board[0:])

In [5]:
model = model_init(file_name)
optimizer = torch.optim.Adam(model.parameters())

model init


In [6]:
def bolvan(game):
    return random.choice(game.get_valid_moves())

In [7]:
class Kalah:
    def __init__(self, num_holes=6, num_seeds=6.):
        self.num_holes = num_holes
        self.num_seeds = num_seeds

        self.board = [float(num_seeds) for _ in range(num_holes * 2 + 2)]  # Заполняем лунки

        self.kalah1_index = self.num_holes  # Индекс корзины первого игрока
        self.kalah2_index = self.num_holes + 1  # Индекс корзины второго игрока

        self.board[self.kalah1_index] = 0.
        self.board[self.kalah2_index] = 0.

        self.current_player = 0  # Текущий игрок

        self.diff1 = self.num_holes + 2  # Разница между противоположными лунками относительно прямого порядка
        self.diff2 = self.num_holes  # Разница между противоположными лунками относительно обратного порядка

        self.move_number = 0

    def step(self, action):
        hole = action
        player = self.current_player
        boardCopy = self.board.copy()

        # Если в выбранной лунке 0 камней, заканчиваем игру и отдаем все камни противоположному игроку
        if boardCopy[hole] == 0:
            for i in range(len(boardCopy)):
                boardCopy[i] = 0.
            boardCopy[self.num_holes + 1] = 2 * self.num_holes * self.num_seeds
            return [boardCopy, player]
        
        seeds = boardCopy[hole]  # Запоминаем количество в выбранной лунке

        boardCopy[hole] = 0.  # Обнуляем выбранную лунку

        # Запускаем распределение камней
        while seeds > 0:
            hole = hole + 1 if hole >= 0 else hole - 1

            # Если прошли свою корзину, переходим на другую сторону
            if hole == self.num_holes + 1:
                hole = -1
            if hole == -1 * self.num_holes - 2:
                hole = 0

            # Если распределение дошло до корзины соперника, пропускаем её
            if hole == -1 * self.num_holes - 1:
                continue

            boardCopy[hole] += 1.  # Увеличиваем кол-во камней в лунке
            seeds -= 1.  # Уменьшаем кол-во камней в выбранной лунке

        # Если последний камень оказался в корзине, выходим без смены хода
        if hole == self.num_holes:
            return [boardCopy, player]

        # Если последний камень попал в пустую лунку принадлежащую ему и противоположная лунка соперника не пуста, то этот камень и все камни из противоположной лунки соперника игрок переносит себе в корзину
        if boardCopy[hole] == 1 and hole >= 0 and boardCopy[hole + self.diff1] > 0:
              boardCopy[self.kalah1_index] += boardCopy[hole + self.diff1] + 1.
              boardCopy[hole] = 0.
              boardCopy[hole + self.diff1] = 0.

        return [self.flip(boardCopy), 1 - player]

    def flip(self, board):
        board_new = [0 for _ in range(len(board))]

        for i in range(self.num_holes):
            board_new[i] = board[-1-i]
            board_new[-1-i] = board[i]

        board_new[self.kalah1_index] = board[self.kalah2_index]
        board_new[self.kalah2_index] = board[self.kalah1_index]
        return board_new
    
    def do_step(self, action):
        tmp = self.step(action)
        self.board = tmp[0]
        self.current_player = tmp[1]
        self.move_number += 1

    def game_over(self):
        return sum(self.board[:self.num_holes]) == 0 or sum(self.board[self.diff1:]) == 0 or self.board[self.kalah2_index] > (self.num_holes * self.num_seeds) or self.board[self.kalah1_index] > (self.num_holes * self.num_seeds)

    def get_winner(self):
        if not self.game_over():
            return None
        return self.current_player if self.board[self.kalah1_index] > self.board[self.kalah2_index] else 1 - self.current_player

    def get_value(self, action):
        boardC = self.board
        return boardC[action]

    def get_state(self):
        return [self.board, self.current_player]
    
    def get_valid_moves(self):
        boardC = self.board
        moves = []
        for i in range(self.num_holes):
            if boardC[i] != 0:
                moves.append(i)
        return moves

    def print_board(self):
        boardC = self.board
        if self.current_player == 1: boardC = self.flip(boardC)
        int_array1 = list(map(int, boardC[:self.num_holes]))
        int_kalah1 = int(boardC[self.num_holes])
        int_array2 = list(map(int, boardC[self.num_holes + 2:]))
        int_kalah2 = int(boardC[-1 * self.num_holes - 1])
        print("=========== Move {}; Player {} ===========".format(self.move_number, self.current_player+1))
        print("player 2:", int_kalah2, int_array2)
        print("player 1:  ", int_array1, int_kalah1)

    def play_game(self):
        move = 0
        while not self.game_over():
            print("=========== Move ", move, " ===========")
            print("player 2:", self.board[-1 * self.num_holes - 1], self.board[self.num_holes + 2:])
            print("player 1:  ", self.board[:self.num_holes], self.board[self.num_holes])
            hole = int(input("Player {}'s turn. Enter hole number(0-5): ".format(self.current_player + 1)))
            while True:
                if hole >= 0 and hole <= 5: break
                hole = int(input("Player {}'s turn. Enter hole number(0-5): ".format(self.current_player + 1)))
            self.do_step(hole)
            move += 1
        self.print_board()
        print("Game over. Player {} wins!".format(self.get_winner() + 1))

    def playBolvanVsBolvan(self):
        while not self.game_over():
            self.print_board()
            hole = bolvan(self)
            print("=========== Action {}; value {} ===========\n".format(hole, int(self.get_value(hole))))
            self.do_step(hole)
        self.print_board()
        print("Game over. Bolvan {} wins!".format(self.get_winner() + 1))

In [8]:
# game = Kalah()
# game.playBolvanVsBolvan()

In [9]:
def train(gamma = 0.3, alpha = 0.3, max_ep = 99999999):
    time_1 = time.time()
    zero_flag = False
    count_errors = 1000
    wins_neural = 0
    reward_for_zero = -50
    inner_time = time.time()
    episode = 0

    while count_errors != 0:
        episode += 1
        if episode % 1000 == 0:
          inner_time = time.time() - inner_time
          print("Neural wins per 1000 plays {}: errors per 1000 plays {} time {}".format(wins_neural, count_errors, inner_time))
          count_errors = 0
          wins_neural = 0
          inner_time = time.time()
          torch.save(model, file_name)
        
        tr = []
        game = Kalah()
        while not game.game_over():
            state = game.get_state()
            cur_board = state[0]
            cur_player = state[1]
            SPACE = space(cur_board)

            if cur_player == 0:
                probs = model(SPACE)
                m = Categorical(probs)
                action = m.sample()
                if game.get_value(action) != 0:
                    reward = 1
                    #reward = game.board[game.num_holes] - game.board[-1 * game.num_holes - 1]
                    tr.append((SPACE.clone(), action.clone(), reward))
                else:
                    reward = reward_for_zero
                    tr.append((SPACE.clone(), action.clone(), reward))
                    count_errors += 1
            else:
                action = bolvan(game)

            # print("Game {}".format(episode))
            # game.print_board()
            # print("Action {}\n".format(action))

            game.do_step(action)

        if game.get_winner() == 0:
            wins_neural += 1

        loss = 0.

        T = len(tr)
        for t in range(T):
            R = 0.
            for i in range(t,T):
                R += (gamma**(i - t))*tr[i][2]

            loss += -alpha*R*Categorical(model(tr[t][0])).log_prob(tr[t][1])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(time.time() - time_1)

In [10]:
train()

  input = module(input)


Neural wins per 1000 plays 80: errors per 1000 plays 1891 time 19.85286855697632
19.888694047927856


In [11]:
def test_neural_bolvan(model):
    counter_error = 0
    neural_wins = 0
    for i in range(1, 100001):
        game = Kalah()
        move = 0
        while not game.game_over():
            SPACE = space(game.board)
            if game.current_player == 0:
                probs = model(SPACE)
                m = Categorical(probs)
                action = m.sample()
                if game.get_value(action) == 0:
                    counter_error += 1
            else:
                action = bolvan(game)

            # print("Game {}".format(i))
            # game.print_board()
            # print("Action {}\n".format(action))
            game.do_step(action)
            move += 1
        
        if game.get_winner() == 0:
            neural_wins += 1

        if i % 1000 == 0:
            print("Neural wins: {};Errors: {}".format(neural_wins, counter_error))
            counter_error = 0
            neural_wins = 0

In [None]:
test_neural_bolvan(model)

In [None]:
print(model)