Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
ahainaut committed Apr 19, 2020
1 parent b94cd65 commit 91afb1d
Show file tree
Hide file tree
Showing 10 changed files with 261 additions and 68 deletions.
11 changes: 11 additions & 0 deletions games/abstract_game.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,17 @@ def human_to_action(self):
choice = input("Enter another action : ")
return int(choice)

@abstractmethod
def expert_agent(self):
"""
Hard coded agent that MuZero faces to assess his progress in multiplayer games.
It doesn't influence training
Returns:
Action as an integer to take in the current game state
"""
pass

@abstractmethod
def action_to_string(self, action_number):
"""
Expand Down
14 changes: 14 additions & 0 deletions games/breakout.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ def __init__(self):
self.players = [i for i in range(1)] # List of players. You should only edit the length
self.stacked_observations = 2 # Number of previous observations and previous actions to add to the current observation

### Evaluate
self.muzero_player = 0 # Turn Muzero begins to play (0: MuZero plays first, 1: MuZero plays second)
self.opponent = None # Hard coded agent that MuZero faces to assess his progress in multiplayer games. It doesn't influence training. None / "random" / "expert" if implemented in the Game class



### Self-Play
Expand Down Expand Up @@ -206,6 +210,16 @@ def human_to_action(self):
"""
pass

def expert_agent(self):
"""
Hard coded agent that MuZero faces to assess his progress in multiplayer games.
It doesn't influence training
Returns:
Action as an integer to take in the current game state
"""
pass

def action_to_string(self, action_number):
"""
Convert an action number to a string representing the action.
Expand Down
14 changes: 14 additions & 0 deletions games/cartpole.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ def __init__(self):
self.players = [i for i in range(1)] # List of players. You should only edit the length
self.stacked_observations = 0 # Number of previous observations and previous actions to add to the current observation

### Evaluate
self.muzero_player = 0 # Turn Muzero begins to play (0: MuZero plays first, 1: MuZero plays second)
self.opponent = None # Hard coded agent that MuZero faces to assess his progress in multiplayer games. It doesn't influence training. None / "random" / "expert" if implemented in the Game class



### Self-Play
Expand Down Expand Up @@ -194,6 +198,16 @@ def human_to_action(self):
"""
pass

def expert_agent(self):
"""
Hard coded agent that MuZero faces to assess his progress in multiplayer games.
It doesn't influence training
Returns:
Action as an integer to take in the current game state
"""
pass

def action_to_string(self, action_number):
"""
Convert an action number to a string representing the action.
Expand Down
52 changes: 52 additions & 0 deletions games/connect4.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ def __init__(self):
self.players = [i for i in range(2)] # List of players. You should only edit the length
self.stacked_observations = 0 # Number of previous observations and previous actions to add to the current observation

### Evaluate
self.muzero_player = 0 # Turn Muzero begins to play (0: MuZero plays first, 1: MuZero plays second)
self.opponent = "expert" # Hard coded agent that MuZero faces to assess his progress in multiplayer games. It doesn't influence training. None / "random" / "expert" if implemented in the Game class



### Self-Play
Expand Down Expand Up @@ -193,6 +197,16 @@ def human_to_action(self):
choice = input("Enter another column : ")
return int(choice)

def expert_agent(self):
"""
Hard coded agent that MuZero faces to assess his progress in multiplayer games.
It doesn't influence training
Returns:
Action as an integer to take in the current game state
"""
return self.env.expert_action()

def action_to_string(self, action_number):
"""
Convert an action number to a string representing the action.
Expand Down Expand Up @@ -296,5 +310,43 @@ def is_finished(self):

return False

def expert_action(self):
board = self.board
action = numpy.random.choice(self.legal_actions())
for k in range(3):
for l in range(4):
sub_board = board[k:k+4, l:l+4]
# Horizontal and vertical checks
for i in range(4):
if abs(sum(sub_board[i, :])) == 3:
ind = numpy.where(sub_board[i, :] == 0)[0][0]
if numpy.count_nonzero(board[:, ind+l]) == i+k:
action = ind + l
if self.player * sum(sub_board[i, :]) > 0:
return action

if abs(sum(sub_board[:, i])) == 3:
action = i + l
if self.player * sum(sub_board[:, i]) > 0:
return action
# Diagonal checks
diag = sub_board.diagonal()
anti_diag = numpy.fliplr(sub_board).diagonal()
if abs(sum(diag)) == 3:
ind = numpy.where(diag == 0)[0][0]
if numpy.count_nonzero(board[:, ind+l]) == ind+k:
action = ind + l
if self.player * sum(diag) > 0:
return action

if abs(sum(anti_diag)) == 3:
ind = numpy.where(anti_diag == 0)[0][0]
if numpy.count_nonzero(board[:, 3-ind+l]) == ind+k:
action = 3-ind+l
if self.player * sum(anti_diag) > 0:
return action

return action

def render(self):
print(self.board[::-1])
14 changes: 14 additions & 0 deletions games/gomoku.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ def __init__(self):
self.players = [i for i in range(2)] # List of players. You should only edit the length
self.stacked_observations = 0 # Number of previous observations and previous actions to add to the current observation

### Evaluate
self.muzero_player = 0 # Turn Muzero begins to play (0: MuZero plays first, 1: MuZero plays second)
self.opponent = "random" # Hard coded agent that MuZero faces to assess his progress in multiplayer games. It doesn't influence training. None / "random" / "expert" if implemented in the Game class



### Self-Play
Expand Down Expand Up @@ -194,6 +198,16 @@ def human_to_action(self):
while not valid:
valid, action = self.env.human_input_to_action()
return action

def expert_agent(self):
"""
Hard coded agent that MuZero faces to assess his progress in multiplayer games.
It doesn't influence training
Returns:
Action as an integer to take in the current game state
"""
pass

def action_to_string(self, action):
"""
Expand Down
14 changes: 14 additions & 0 deletions games/lunarlander.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ def __init__(self):
self.players = [i for i in range(1)] # List of players. You should only edit the length
self.stacked_observations = 0 # Number of previous observations and previous actions to add to the current observation

### Evaluate
self.muzero_player = 0 # Turn Muzero begins to play (0: MuZero plays first, 1: MuZero plays second)
self.opponent = None # Hard coded agent that MuZero faces to assess his progress in multiplayer games. It doesn't influence training. None / "random" / "expert" if implemented in the Game class



### Self-Play
Expand Down Expand Up @@ -194,6 +198,16 @@ def human_to_action(self):
"""
pass

def expert_agent(self):
"""
Hard coded agent that MuZero faces to assess his progress in multiplayer games.
It doesn't influence training
Returns:
Action as an integer to take in the current game state
"""
pass

def action_to_string(self, action_number):
"""
Convert an action number to a string representing the action.
Expand Down
48 changes: 48 additions & 0 deletions games/tictactoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ def __init__(self):
self.players = [i for i in range(2)] # List of players. You should only edit the length
self.stacked_observations = 0 # Number of previous observations and previous actions to add to the current observation

### Evaluate
self.muzero_player = 0 # Turn Muzero begins to play (0: MuZero plays first, 1: MuZero plays second)
self.opponent = "expert" # Hard coded agent that MuZero faces to assess his progress in multiplayer games. It doesn't influence training. None / "random" / "expert" if implemented in the Game class



### Self-Play
Expand Down Expand Up @@ -217,6 +221,16 @@ def human_to_action(self):
print("Wrong input, try again")
return choice

def expert_agent(self):
"""
Hard coded agent that MuZero faces to assess his progress in multiplayer games.
It doesn't influence training
Returns:
Action as an integer to take in the current game state
"""
return self.env.expert_action()

def action_to_string(self, action_number):
"""
Convert an action number to a string representing the action.
Expand Down Expand Up @@ -300,6 +314,40 @@ def is_finished(self):
return True

return False

def expert_action(self):
board = self.board
action = numpy.random.choice(self.legal_actions())
# Horizontal and vertical checks
for i in range(3):
if abs(sum(board[i, :])) == 2:
ind = numpy.where(board[i, :] == 0)[0][0]
action = numpy.ravel_multi_index((numpy.array([i]), numpy.array([ind])), (3, 3))[0]
if self.player * sum(board[i, :]) > 0:
return action

if abs(sum(board[:, i])) == 2:
ind = numpy.where(board[:, i] == 0)[0][0]
action = numpy.ravel_multi_index((numpy.array([ind]), numpy.array([i])), (3, 3))[0]
if self.player * sum(board[:, i]) > 0:
return action

# Diagonal checks
diag = board.diagonal()
anti_diag = numpy.fliplr(board).diagonal()
if abs(sum(diag)) == 2:
ind = numpy.where(diag == 0)[0][0]
action = numpy.ravel_multi_index((numpy.array([ind]), numpy.array([ind])), (3, 3))[0]
if self.player * sum(diag) > 0:
return action

if abs(sum(anti_diag)) == 2:
ind = numpy.where(anti_diag == 0)[0][0]
action = numpy.ravel_multi_index((numpy.array([ind]), numpy.array([2 - ind])), (3, 3))[0]
if self.player * sum(anti_diag) > 0:
return action

return action

def render(self):
print(self.board[::-1])
8 changes: 4 additions & 4 deletions muzero.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,13 @@ def train(self):
"1.Total reward/3.Episode length", infos["episode_length"], counter,
)
writer.add_scalar(
"1.Total reward/4.Player 0 MuZero reward",
infos["player_0_reward"],
"1.Total reward/4.MuZero reward",
infos["muzero_reward"],
counter,
)
writer.add_scalar(
"1.Total reward/5.Player 1 Random reward",
infos["player_1_reward"],
"1.Total reward/5.Opponent reward",
infos["opponent_reward"],
counter,
)
writer.add_scalar(
Expand Down
Loading

0 comments on commit 91afb1d

Please sign in to comment.