In [1]:
import hanabi_multiagent_framework as hmf
from hanabi_multiagent_framework.utils import make_hanabi_env_config
import numpy as np
from numpy import ndarray
from hanabi_agents.rule_based import ParallelRulebasedAgent
import hanabi_agents.rule_based.predefined_rules as rules
from hanabi_learning_environment import pyhanabi, rl_env
from hanabi_agents.rule_based.Diversity import Diversity
from hanabi_agents.rule_based.ruleset import Ruleset 
from evolve import Evolution
import timeit
import statistics
import dill as pickle
import math

## Intra-Agent Entropy Code

In [189]:
def Intra_Agent_Entropy(my_rules):

    names_dict = {}

    for i in rule_names:
        names_dict[i] = 1e-6 

    agent_rules = []
    #For all agents, make a names dictionary
    for _ in range(my_rules.shape[0]):
        agent_rules.append(names_dict.copy())


    agent_rules = np.array(agent_rules) 

    for i in range(my_rules.shape[0]):
        rule_list = my_rules[i]
        agent = agent_rules[i]
        for rule in rule_list:
            if agent[rule.__name__] == 1e-6:
                agent[rule.__name__] = 1

            else:
                agent[rule.__name__] += 1

    # Update Probabilities
    agent_probs = []

    for agent in agent_rules:
        agent_probs.append(agent.copy())

    for agent in agent_probs:
        for rule in agent.keys():
            agent[rule] = agent[rule] / my_rules.shape[1]


    # Calculate Entropy
    Entropies = []
    for agent in agent_probs:
        E = 0
        for rules in agent.keys():
            E += - agent[rule] * math.log(agent[rule])

        Entropies.append(E)

    Entropies = np.array(Entropies)

    return Entropies

In [207]:
population_size = 5
n_rules = 20
my_rules = np.array([rules.random_rules(n_rules) for _ in range(population_size)])

# diversity = Intra_Agent_Entropy(my_rules)

In [226]:
rule_names = []
for i in range(my_rules.shape[0]):
    for j in range(my_rules.shape[1]):
        rule_names.append(my_rules[i,j].__name__)

rule_names = np.unique(rule_names)

(12,)

array(['discard_oldest_first', 'discard_randomly', 'hail_mary',
       'osawa_discard', 'play_probably_safe_treshold',
       'play_probably_useless_treshold', 'tell_anyone_useful_card',
       'tell_anyone_useless_card', 'tell_dispensable',
       'tell_most_information', 'tell_playable_card_outer',
       'tell_randomly'], dtype='<U30')

In [212]:
diversity

array([2, 3, 0, 1, 4])

In [215]:
rule_names

array(['discard_oldest_first', 'discard_randomly', 'hail_mary',
       'osawa_discard', 'play_probably_safe_treshold',
       'play_probably_useless_treshold', 'tell_anyone_useful_card',
       'tell_anyone_useless_card', 'tell_dispensable',
       'tell_most_information', 'tell_playable_card_outer',
       'tell_randomly'], dtype='<U30')

## Name Distance

In [9]:
population_size = 5
n_rules = 20

my_rules = []

for _ in range(population_size):
    x, _ = rules.random_rules(n_rules) 
    my_rules.append(np.array(x))

my_rules = np.array(my_rules)

In [10]:
my_rules.shape

(5, 20)

In [11]:
A = np.array(my_rules[0])
B = np.array(my_rules[3])

In [12]:
rulebase = np.array(rules.big_ruleset)

axes = [] 

for rule in rulebase: 
    if rule.__name__ not in axes:
        axes.append(rule.__name__)

axes = np.array(axes)
axes

array(['play_probably_safe_treshold', 'play_probably_useless_treshold',
       'tell_dispensable', 'hail_mary', 'tell_playable_card_outer',
       'tell_randomly', 'tell_anyone_useful_card',
       'tell_anyone_useless_card', 'tell_most_information',
       'discard_randomly', 'osawa_discard', 'discard_oldest_first'],
      dtype='<U30')

In [30]:
A_ = np.zeros(axes.shape[0])
B_ = np.zeros(axes.shape[0])

for rule in A: 
    index = np.where(axes == rule.__name__)
    A_[index] += 1
    
for rule in B: 
    index = np.where(axes == rule.__name__ )
    B_[index] += 1

In [31]:
A_

array([11.,  3.,  3.,  0.,  1.,  0.,  0.,  0.,  1.,  0.,  1.,  0.])

In [32]:
B_

array([7., 4., 4., 0., 1., 0., 1., 0., 1., 1., 1., 0.])

In [34]:
dist = np.linalg.norm(A_ - B_)

4.47213595499958

In [13]:
Diversity.Name_Distance(A, B, rulebase)

3.1622776601683795

## Checking the Evaluation procedure

In [2]:
population_size = 5
n_rules = 20
n_parallel = 50 
n_players = 2

# Initialize the environment
env_conf = make_hanabi_env_config('Hanabi-Full', n_players)
env = hmf.HanabiParallelEnvironment(env_conf, n_parallel)


my_rules = []

for _ in range(population_size):
    x, _ = rules.random_rules(n_rules) 
    my_rules.append(np.array(x))

my_rules = np.array(my_rules)


scores = []
individual_scores = np.zeros((population_size, population_size, 10))

for k in range(population_size):
    
    # For each agent in the population, create a second agent composed of all agents
    agents = [ParallelRulebasedAgent([my_rules[k]], n_parallel), ParallelRulebasedAgent(my_rules, n_parallel)]

    # Start a parallel session with these two agents and pass the number of games to be played between
    # them as the environment config
    parallel_session = hmf.HanabiParallelSession(env, agents)

    # Get the result and append the scores
    result = np.array(parallel_session.run_eval(dest=None, print_intermediate=False))
    print(f"Shape of Result ----> {np.array(result).shape} ")

    result = np.array(result)

    # split the results into a 2D array of individual scores and append
    individual_scores[k] = np.array(np.split(result, population_size))

    # Take the average of all games between pairs
    result = np.array([np.mean(arr) for arr in individual_scores[k]])
    # print(f"Result after averaging: {result}")

    scores.append(result)

scores = np.array(scores)
individual_scores = np.array(individual_scores)

Shape of Result ----> (50,) 
Shape of Result ----> (50,) 
Shape of Result ----> (50,) 
Shape of Result ----> (50,) 
Shape of Result ----> (50,) 


In [3]:
scores.shape

(5, 5)

In [4]:
x = np.mean(individual_scores, axis = 2)

In [5]:
scores

array([[ 0. ,  0. ,  0. ,  7.3,  2.6],
       [ 1.7,  0. ,  0. ,  2.3,  0. ],
       [ 0. ,  0. ,  1.6,  7.3,  2.6],
       [ 6. ,  1.1,  2.5, 11.5,  9.2],
       [ 3.6,  0. ,  2.6,  2.3,  1.5]])

In [6]:
x

array([[ 0. ,  0. ,  0. ,  7.3,  2.6],
       [ 1.7,  0. ,  0. ,  2.3,  0. ],
       [ 0. ,  0. ,  1.6,  7.3,  2.6],
       [ 6. ,  1.1,  2.5, 11.5,  9.2],
       [ 3.6,  0. ,  2.6,  2.3,  1.5]])

In [64]:
np.mean(x, axis = 1)

array([0.58, 2.18, 2.84, 0.68, 0.5 ])

In [67]:
np.mean([0. , 7.3, 1.4, 1.6, 0.6]) 

2.1799999999999997

In [76]:
alpha = 0.01
beta = 0.2
 
performances = np.zeros((x.shape[0], x.shape[1]))

for i in range(x.shape[0]): 
    for j in range(x.shape[1]):
        d_i_j = x[i,j] + alpha * Diversity.Name_Distance(my_rules[i], my_rules[j], rulebase) 
        performances[i,j] = beta * d_i_j

    
        


In [77]:
performances = np.mean(performances, axis = 1)

performances

array([0.12145005, 0.44247924, 0.57272117, 0.14107313, 0.1047392 ])

In [78]:
np.argsort(performances)

array([4, 0, 3, 1, 2])

In [7]:
fitness = np.mean(np.sort(x, axis =1)[-2:], axis =1)

In [21]:
y = np.sort(x, axis = 1 )

In [22]:
y

array([[ 0. ,  0. ,  0. ,  2.6,  7.3],
       [ 0. ,  0. ,  0. ,  1.7,  2.3],
       [ 0. ,  0. ,  1.6,  2.6,  7.3],
       [ 1.1,  2.5,  6. ,  9.2, 11.5],
       [ 0. ,  1.5,  2.3,  2.6,  3.6]])

In [23]:
z = [ arr[-2:] for arr in y]

In [24]:
z = np.

[array([2.6, 7.3]),
 array([1.7, 2.3]),
 array([2.6, 7.3]),
 array([ 9.2, 11.5]),
 array([2.6, 3.6])]

In [25]:
z = np.mean(z, axis=1)

In [26]:
z

array([ 4.95,  2.  ,  4.95, 10.35,  3.1 ])