In [None]:
import gym
import minihack
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as display
from tqdm import tqdm
import random
import math

# set the seed for reproducibility
SEED = 42
np.random.seed(SEED)

from utils import *
from gen import test, random_search, true_random_search, true_random_nsteps


In [None]:
env = gym.make(
    "MiniHack-Navigation-Custom-v0",
    observation_keys=("chars", "pixel"),
    des_file = "complex_maze.des",
    max_episode_steps=10000,
)
state = env.reset()
env.render()

In [None]:
plt.imshow(state['pixel'])

In [None]:
game_map = state['chars']
game = state['pixel']
start = get_player_location(game_map)
target = get_target_location(game_map)

In [None]:
a = (0, -1)
b = (0, 1)
c = (1, 0)      
d = (-1, 0)

ACTIONS = [a, b, c, d]

def modify_action(t1, t2):
    """Sum two tuples"""
    return (t1[0] + t2[0], t1[1] + t2[1])

def is_crossoverable(action1, action2):
    # if action 1 and action 2 are oblique, return False
    dx = abs(action1[0] - action2[0])
    dy = abs(action1[1] - action2[1])
    # return false if the two components are changing together and > 1
    if (dx > 0 and dy > 0) and (dx > 1 or dy > 1):
        return False
    else:
        #print(f'actions: ,{action1, action2}')
        return True

def crossover_path(path1, path2):
    """Crossover two paths"""
    # randomly select a crossover point
    i = np.random.randint(1, min(len(path1), len(path2)))
    while not is_crossoverable(path1[i-1], path2[i]):
        i = np.random.randint(1, min(len(path1), len(path2)))

    # return the two paths joined at the crossover point
    # TODO:  implement controls on move validity

    # until the path is valid, merge the 2 path
    """print(f'point of crossover: {i}')
    print(f'path1: {path1}')
    print(f'path2: {path2}')
    print(f'lenp1: {len(path1[:i])}, path1[:i]: {path1[:i]}')
    print(f'lenp2: {len(path2[i:])}, path2[i:]: {path2[i:]}')"""

    pathtry = path1[:i] + path2[i:]

    #concatenete path1 and path2

    for idx in range(1, len(pathtry)):
        if is_wall(game_map[pathtry[idx]]):
            #truncate here pathtry[:idx]
            return path1[:i] + path2[i:idx-1]
    return pathtry

def crossover(actions1, actions2):
    """Crossover two paths"""
    # randomly select a crossover point
    i = np.random.randint(1, min(len(actions1), len(actions2)))
    # return the two paths joined at the crossover point
    return actions1[:i] + actions2[i:]



def mutate_path(path, mutation_rate=0.05):
    """Mutate a path"""
    # randomly select n postions to mutate
    print(f'before mutation {path}')
    actions = actions_from_path(start, path[1:])
    idxs = random.sample(list(range(len(actions)))[1:], k = math.floor(len(actions)/10))
    print("idxs", idxs)
    # randomly select new actions for each position and replace
    # TODO?  implement controls on move validity
    for idx in idxs:
        print(f"valid moves:", get_valid_actions(game_map, path[idx]))
        print(game_map[path[idx]])
        action = random.choice(get_valid_actions(game_map, path[idx]))
        actions[idx] = action
    path = path_from_actions(path[0], actions)
    print(f'after mutation {path}')
    return path

def mutate(actions, mutation_rate=0.05):
    """Mutate a path"""
    # randomly select n postions to mutate
    idxs = random.sample(list(range(len(actions))), k = math.floor(len(actions)/10))
    # randomly select new actions for each position and replace
    for idx in idxs:
        actions[idx] = random.choice([0, 1, 2, 3])
    return actions

In [None]:
start

In [None]:
MAX_GENERATIONS = 100
MAX_INDIVIDUALS = 100

best_scores =[]
best_paths = []



# this is manhattan distance from the last element of the path to the target
fitness_function = lambda path: abs(path[-1][0] - target[0]) + abs(path[-1][1] - target[1])
state = env.reset()
"""env = gym.make(
    "MiniHack-Navigation-Custom-v0",
    observation_keys=("chars", "pixel"),
    des_file = "complex_maze.des",
    max_episode_steps=10000,
)
state = env.reset()
env.render()"""
# create a list of individuals, starting with random moves (illegal actions filtered out)
print("> Creating initial population...")
individuals = [true_random_nsteps(game_map, start, target) for _ in range(MAX_INDIVIDUALS)]
best_fitness = np.inf

print("> Evolving...")
for generation in tqdm(range(MAX_GENERATIONS), postfix=f'best score: {best_fitness:.2f}'):
    
    generation_scores = []
    
    fitnesses = [fitness_function(individual) for individual in individuals]
    ind_actions = [actions_from_path(start, ind) for ind in individuals]
    generation_scores.append(min(fitnesses))

    # this is a list of tuples (individual, fitness). individual is a list of moves
    population = list(zip(individuals, fitnesses))
    actions =  list(zip(ind_actions, fitnesses))

    # sorting the population by best fitness (lower is better)
    population.sort(key=lambda x: x[1])
    actions.sort(key=lambda x:x[1])
    # sort ind_actions with respect to population
    
    print(f"best score: {population[0][1]:.2f}")

    # take 2 best individuals -> maybe can be replaced with probability distribution based on fitness
    # also roulette wheel selection.

    child1, child2, = actions[0][0], actions[1][0]


    offspring = [crossover(child1, child2) for _ in range(MAX_INDIVIDUALS)]
    offspring = [mutate(child) for child in offspring]
    ind_actions = offspring
    individuals = [path_from_actions(game_map, start, child) for child in offspring]

    best_fitness = population[0][1]
    best_scores.append(population[0][1])    
    best_paths.append(population[0][0])
    #print(f"Generation {generation}: best score {best_fitness:.2f}")
    

In [None]:
for path in best_paths:
    print(path)
    for x,y in path:
        if is_wall(game_map[x,y]):
            print("ciau")



#print(target)

In [None]:
# for every individual, print its fitness and the path
"""for individual, fitness in population:
    print(f"Fitness: {fitness:.2f}")"""


In [None]:
image = plt.imshow(game[:, 300:975])
for generation, path in enumerate(best_paths[:-10]):
    plt.title(f"Generation {generation}")
    start = path[0]
    path = path[1:]
    actions = actions_from_path(start, path)
    env.reset()
    for action in actions:
        s, _, _, _ = env.step(action)
        display.display(plt.gcf())
        display.clear_output(wait=True)
        image.set_data(s['pixel'][:, 300:975])
    

In [None]:
path = true_random_nsteps(game_map, start, target)
path

In [None]:
actions = actions_from_path(start, path[1:])
actions