Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [871]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from dataclasses import dataclass 
from copy import deepcopy


## The *Nim* and *Nimply* classes

In [872]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [873]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [874]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [875]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [876]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [877]:
import numpy as np

# Returns the nim sum of the rows (binary rappresentation XOR)
def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)

# Create a dictionary that contains all possible (valid) moves
def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked

# Returns the best moves that return a 
def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


In [878]:
def get_number_of_rows(state:Nim):
    n=np.count_nonzero(state.rows)
    return n

def human(state:Nim) ->Nimply:
    while(True):
        print(f"{state} nim sum: {nim_sum(state)} and not zero columns: {get_number_of_rows(state)}")
        print("Write the row")
        r=int(input())
        print("Write the number of elements to take")
        o=int(input())

        if r<len(state.rows) and o<=state.rows[r]:
            return Nimply(r,o)



def vinzgorithm(state: Nim) -> Nimply:
    ns=nim_sum(state)
    
    if(get_number_of_rows(state)>=2):
        return Nimply(int(np.argmax(state.rows)), int(np.max(state.rows)))
    else:
        moves=[Nimply(r,c-1) for r,c in enumerate(state.rows) if(c>1)]
    
    logging.info(f"len: {len(moves)} nin_sum: {ns}")
    if(len(moves)==0):
        moves=[Nimply(n,o) for n,c in enumerate(state.rows) for o in range(1,c+1)]
    ply=random.choice(moves)
    return ply

    
def play(strategy1, strategy2, orig_nim, times):
    strategy=(strategy1,strategy2)
    win0=0
    for _ in range(times):
        nim=deepcopy(orig_nim)
        logging.info(f"init : {nim}")
        player = 0
        while nim:
            ply = strategy[player](nim)
            logging.info(f"ply: player {player} plays {ply}")
            nim.nimming(ply)
            logging.info(f"status: {nim}")
            player = 1 - player
        logging.info(f"status: Player {player} won!")
        if(player==0):
            win0+=1
    return win0/times, 1-win0/times
    

In [879]:
def enum_state(rows:tuple):
    possible_states=[rows]
    temp=rows
    for r in range(len(rows)):
        temp_list=[]
        for i in possible_states:
            temp=list(i)
            assert type(temp)!=int
            
            while (temp[r]>0):
                temp[r]-=1
                temp_list.append(tuple(temp))
        possible_states.extend(temp_list)
    
    print(len(possible_states))
    return possible_states



## Oversimplified match

In [880]:
SIZE=3

default_state=Nim(SIZE)

possible_states=enum_state(default_state.rows)

class Individual:
    genome=dict()
    def __init__(self):
        global possible_states, default_state
        
        for t in possible_states:
            moves=[Nimply(r,o) for r,c in enumerate(t) for o in range(1,c+1)]
            if(len(moves)!=0):
                self.genome[t]=random.choice(moves)
            else:
                self.genome[t]=Nimply(0,0)
        
    def strategy(self, state:Nim):
        return self.genome[state.rows]


def crossover(individual1:Individual, individual2:Individual):
    global possible_states
    individual3=Individual()
    individual4=Individual()
    for t in possible_states:
        if random.choice([True,False]):
            individual3.genome[t]=individual1.genome[t]
            individual4.genome[t]=individual2.genome[t]
        else:
            individual4.genome[t]=individual1.genome[t]
            individual3.genome[t]=individual2.genome[t]
    return individual3, individual4

def compare(individual1, individual2):
    nim=deepcopy(default_state)

    strategy=(individual1,individual2)
    assert type(individual1)==Individual
    assert type(individual2)==Individual
    assert type(strategy[0])==Individual
    player = 0
    while nim:
        ply = strategy[player].genome[nim.rows]
        nim.nimming(ply)
        player = 1 - player
    return strategy[player]


def eval(individual1, test):
    nim=deepcopy(default_state)

    strategy=(test, individual1.strategy)
    player = 0
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player
    return player==1

def evolve(power_of_two:int):
    n=2**power_of_two
    individuals=[Individual() for _ in range(n)]
    
    while(True):
        new_gen=[]
        for i in range(0,n,2):
            new_gen.append(compare(individuals[i], individuals[i+1]))
        
        if(len(new_gen)==1):
            return new_gen[0]
        
        individuals=[]
        for j in range(0,len(new_gen),2):
            individuals.extend(crossover(new_gen[j], new_gen[j+1]))
            
        n=len(individuals)


def evolve2(power_of_two:int):
    n=2**power_of_two
    individuals=[Individual() for _ in range(n)]
    
    while(True):
        new_gen=[]
        for i in range(0,n,2):
            if(eval(individuals[i], optimal)):
                new_gen.append(individuals[i])
        
        if(len(new_gen)==1):
            return new_gen[0]
        if(len(new_gen)==0):
            return individuals[0]
        
        individuals=[]
        for j in range(0,len(new_gen),2):
            if j+1==len(new_gen):
                individuals.append(new_gen[j])
            else:
                individuals.extend(crossover(new_gen[j], new_gen[j+1]))
            
        n=len(individuals)


    

48


In [881]:
logging.getLogger().setLevel(logging.INFO)
logging.disable(logging.INFO)
strategy = (vinzgorithm, optimal)

nim = Nim(SIZE)

last_man_standing=evolve(21)

w0, w1=play(optimal, last_man_standing.strategy, nim, 100)

# enum_state(nim)
print(f"Player 0 won: {w0*100}% of times Player 1 won: {w1*100}% of times")


KeyboardInterrupt: 