# Lab 3: Policy Search
## Task
Write agents able to play Nim, with an arbitrary number of rows and an upper bound  on the number of objects that can be removed in a turn (a.k.a., subtraction game).

The player taking the last object wins.

- Task3.1: An agent using fixed rules based on nim-sum (i.e., an expert system)

- Task3.2: An agent using evolved rules

In [51]:
import logging
from collections import namedtuple
import random
from typing import Callable
from copy import deepcopy
from itertools import accumulate
from operator import xor

logging.getLogger().setLevel(logging.INFO)

In [52]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [53]:
class Nim:
    # Initialize a board for NIM game with k rows 
    # (the rows have odds increasing numbers starting from 1)
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self) -> int:
        return self._k

    # Remove from a choosen row a selected number of objects
    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

In [54]:
def nim_sum(state: Nim) -> int:
    *_, result = accumulate(state.rows, xor)
    return result

def cook_status(state: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = [
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) if state.k is None or o <= state.k
    ]
    cooked["active_rows_number"] = sum(o > 0 for o in state.rows)
    cooked["shortest_row"] = min((x for x in enumerate(state.rows) if x[1] > 0), key=lambda y: y[1])[0]
    cooked["longest_row"] = max((x for x in enumerate(state.rows)), key=lambda y: y[1])[0]
    cooked["nim_sum"] = nim_sum(state)
    cooked["active_obj_number"] = sum(c for r, c in enumerate(state.rows))

    brute_force = list()
    for m in cooked["possible_moves"]:
        tmp = deepcopy(state)
        tmp.nimming(m)
        brute_force.append((m, nim_sum(tmp)))
    cooked["brute_force"] = brute_force

    return cooked

## Strategies

In [55]:
def pure_random(state: Nim) -> Nimply:
    # enumerate(state.rows) --> return the number of the row (r) and the number of objects in that row (c)
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)

In [56]:
def dumb_strategy(state: Nim) -> Nimply:
    data = cook_status(state)
    row = data["longest_row"]
    num_objects = 1
    return Nimply(row, num_objects)

In [57]:
def good_strategy(state: Nim) -> Nimply:
    data = cook_status(state)
    row = data["longest_row"]
    num_objects = state.rows[data["longest_row"]]
    return Nimply(row, num_objects)

In [58]:
def optimal_startegy(state: Nim) -> Nimply:
    data = cook_status(state)
    return next((bf for bf in data["brute_force"] if bf[1] == 0), random.choice(data["brute_force"]))[0]

In [59]:
def make_strategy1(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:
        data = cook_status(state)

        if random.random() > genome["p"]:
            ply = Nimply(data["shortest_row"], 1)
        else: 
            ply = Nimply(data["longest_row"], 1)

        return ply
    return evolvable

In [60]:
def make_strategy2(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:
        data = cook_status(state)

        if data["active_rows_number"] % 2 == 0 and random.random() > genome["p"]:
            ply = Nimply(data["longest_row"], 1)
        else: 
            ply = Nimply(data["longest_row"], state.rows[data["longest_row"]])

        return ply
    return evolvable

In [61]:
def make_strategy3(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:
        data = cook_status(state)

        if data["active_rows_number"] > 5 and random.random() > genome["p"]:
            ply = Nimply(data["longest_row"], state.rows[data["longest_row"]])
        else: 
            ply = Nimply(data["longest_row"], 1)

        return ply
    return evolvable

In [62]:
def make_strategy4(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:
        data = cook_status(state)

        if data["active_obj_number"] %2 == 0 and random.random() > genome["p"]:
            if state.rows[data["longest_row"]] > 1:
                ply = Nimply(data["longest_row"], 2)
            else:
                ply = Nimply(data["longest_row"], 1)
        else:
            ply = Nimply(data["longest_row"], 1)

        return ply
    return evolvable

In [63]:
def make_strategy5(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:
        data = cook_status(state)

        if data["active_rows_number"] == 1 and random.random() > genome["p"]:
            ply = Nimply(data["longest_row"], state.rows[data["longest_row"]])
        else:
            ply = Nimply(data["longest_row"], state.rows[data["longest_row"]]-1)

        return ply
    return evolvable

## Evaluation

In [64]:
NUM_MATCHES = 20
NIM_SIZE = 10

def evaluate(strategy: Callable, opponent: Callable) -> float:    
    players = (strategy, opponent)
    won = 0

    for m in range(NUM_MATCHES):
        nim = Nim(NIM_SIZE)
        player = random.choice([0,1])
        while nim:
            ply = players[player](nim)
            nim.nimming(ply)
            player = 1 - player
        if player == 1:
            won += 1
    return won / NUM_MATCHES

## Evolution

In [71]:
Individual = namedtuple("Individual", ["strategy", "genome", "fitness"])

In [72]:
def calculate_fitness(strategy):
    # Evaluate out strategy 

    e1 = evaluate(strategy, pure_random)
    e2 = evaluate(strategy, dumb_strategy)
    e3 = evaluate(strategy, good_strategy)
    e4 = evaluate(strategy, optimal_startegy)
    
    return (e1 + e2 + e3 + e4) / 4

In [73]:
def tournament(population, tournament_size=2):
    # Take randomly 2 individuals from the  population and take the one with the higher fitness
    return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness)

In [74]:
def mutation(strategy, genome, m1):
    # We randomly modify our strategy by modifing its probability
    new_genome = genome

    if random.random() > m1:
        if genome["p"] > 0:
            new_genome["p"] = genome["p"] - 0.05
        else:
            new_genome["p"] = genome["p"]
    else:
        new_genome["p"] = genome["p"] + 0.05
    

    if strategy == "strategy1":
        strat = make_strategy1(new_genome)
    elif strategy == "strategy2":
        strat = make_strategy2(new_genome)
    elif strategy == "strategy3":
        strat = make_strategy3(new_genome)
    elif strategy == "strategy4":
        strat = make_strategy4(new_genome)
    elif strategy == "strategy5":
        strat = make_strategy5(new_genome)


    return strat, new_genome

## Evolved Strategy

In [79]:
NUM_GENERATIONS = 15
OFFSPRING_SIZE = 7

m1 = 0.5

def evolution(population):

    fitness_log = [(0, i.fitness) for i in population]

    for g in range(NUM_GENERATIONS):
        offspring = list()
        for i in range(OFFSPRING_SIZE):

            p = tournament(population)

            new_strat, new_g = mutation(p.strategy, p.genome, m1)

            f = calculate_fitness(new_strat)

            fitness_log.append((g + 1, f))

            offspring.append(Individual(p.strategy, new_g, f))

        population += offspring

        population = sorted(population, key=lambda i: i.fitness, reverse=True)

    return population

## Generate Population

In [87]:
population = list()

genome = {"p": 0.5}

for i in range(3):
    #genome = {"p": random.random()}
    strategy = make_strategy1(genome)
    population.append(Individual( "strategy1", genome, calculate_fitness(strategy) ))

for i in range(3):
    #genome = {"p": random.random()}
    strategy = make_strategy2(genome)
    population.append(Individual( "strategy2", genome, calculate_fitness(strategy) ))

for i in range(3):
    #genome = {"p": random.random()}
    strategy = make_strategy3(genome)
    population.append(Individual( "strategy3", genome, calculate_fitness(strategy) ))

for i in range(3):
    #genome = {"p": random.random()}
    strategy = make_strategy4(genome)
    population.append(Individual( "strategy4", genome, calculate_fitness(strategy) ))

for i in range(3):
    #genome = {"p": random.random()}
    strategy = make_strategy5(genome)
    population.append(Individual( "strategy5", genome, calculate_fitness(strategy) ))


## Start EVOLVING Algorithm

In [88]:
final_pop = evolution(population=population)

print(final_pop[0])

Individual(strategy='strategy5', genome={'p': 0.30000000000000004}, fitness=1.0)


## Oversimplified match

In [85]:
logging.getLogger().setLevel(logging.DEBUG)

strategy = (optimal_startegy, make_strategy5({"p": 0.247}))

nim = Nim(11)
logging.debug(f"status: Initial board  -> {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    nim.nimming(ply)
    logging.debug(f"status: After player {player} -> {nim}")
    player = 1 - player
winner = 1 - player
logging.info(f"status: Player {winner} won!")

DEBUG:root:status: Initial board  -> <1 3 5 7 9 11 13 15 17 19 21>
DEBUG:root:status: After player 0 -> <1 3 5 7 9 11 13 15 6 19 21>
DEBUG:root:status: After player 1 -> <1 3 5 7 9 11 13 15 6 19 1>
DEBUG:root:status: After player 0 -> <1 3 5 7 9 11 13 15 6 7 1>
DEBUG:root:status: After player 1 -> <1 3 5 7 9 11 13 1 6 7 1>
DEBUG:root:status: After player 0 -> <1 3 5 7 7 11 13 1 6 7 1>
DEBUG:root:status: After player 1 -> <1 3 5 7 7 11 1 1 6 7 1>
DEBUG:root:status: After player 0 -> <1 3 5 7 7 7 1 1 6 7 1>
DEBUG:root:status: After player 1 -> <1 3 5 1 7 7 1 1 6 7 1>
DEBUG:root:status: After player 0 -> <1 3 3 1 7 7 1 1 6 7 1>
DEBUG:root:status: After player 1 -> <1 3 3 1 1 7 1 1 6 7 1>
DEBUG:root:status: After player 0 -> <1 3 3 1 1 1 1 1 6 7 1>
DEBUG:root:status: After player 1 -> <1 3 3 1 1 1 1 1 6 1 1>
DEBUG:root:status: After player 0 -> <1 3 3 1 1 1 1 1 0 1 1>
DEBUG:root:status: After player 1 -> <1 1 3 1 1 1 1 1 0 1 1>
DEBUG:root:status: After player 0 -> <1 1 1 1 1 1 1 1 0 1 1>
D