<a href="https://colab.research.google.com/github/vbipin/aip/blob/master/GridSoc_single.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import random
import itertools #for some itertools.product(a,b)
#
#We plan to implement the Gridworld Socccer 
#

In [0]:
#Let us have a gridworld
#ref: Chapter 17, Artificial Intelligence a Modern Approach
#ref: CS188 https://inst.eecs.berkeley.edu/~cs188/fa19/
#ref: https://inst.eecs.berkeley.edu/~cs188/fa19/assets/slides/lec8.pdf


#We play Soccer in a grid world.
#In this version we have only one player and one ball.


#Actions #The tuples are just row and column offsets for that action.
soc_actions = {
#Player Movements
'MoveUp'    : (-1, 0),
'MoveDown'  : (1,  0),
'MoveRight' : (0,  1),
'MoveLeft'  : (0, -1),

#### Ball movements
'KickUp'    : (-1, 0),
'KickDown'  : (1,  0),
'KickRight' : (0,  1),
'KickLeft'  : (0, -1),
    
'KickUp2'    : (-2, 0),
'KickDown2'  : (2, 0),
'KickRight2' : (0, 2),
'KickLeft2'  : (0, -2),
    
'KickRightUp'    : (-1,  1),
'KickRightDown'  : (1,   1),
'KickLeftUp'     : (-1,  -1),
'KickLeftDown'   : (1, -1),
    
'KickRightUp2'    : (-2, 2),
'KickRightDown2'  : (2,  2),
'KickLeftUp2'     : (-2, -2),
'KickLeftDown2'   : (2,-2),
    
'Goal' : (0,0), #dummy action for Exit
}

class GridSoc :
    def __init__(self, 
                 rows    = 6, 
                 columns = 6, 
                 walls   = [], 
                 terminals     = {(2,0):+1.0, (3,0):+1.0}, 
                 transitions   = None,
                 gamma         = 1.0, 
                 living_reward = -0.01,
                 noise         = 0.2
                ) :
        """We dont expect these parameters to change during the agent run"""
        self.rows      = rows
        self.columns   = columns
        self.N         = rows * columns #total cells
        self.walls     = walls
        self.terminals = terminals #dictionary of terminal celss and their rewards.
        self.gamma     = gamma
        self.living_reward = living_reward
        self.all_actions   = list(soc_actions.keys())
        self.end_state     = ((-1,-1), (-1, -1)) #a dummy state to reach after taking Exit
        self.valid_cells   = [(r,c) for r in range(rows) for c in range(columns) if (r,c) not in walls]
        self.all_states    = list(itertools.product(self.valid_cells, self.valid_cells)) + [self.end_state]
        self.noise         = noise
        
        #XXX for debugging info
        self.debug = {}
        
        #transitions from each state and the probabilities
        self.noise                = noise
    
    def actions(self, state) :
        """In Soc all actions are posible. But some actions result in NoOperation"""
        if state == self.end_state :
            return [] #No action available.
        return self.all_actions

    def _is_player(self, action) :
        """returns true if the action is a player movement ie Not a ball movement"""
        if action in ['MoveUp', 'MoveDown', 'MoveRight', 'MoveLeft']: 
            return True
        return False
    
    def _is_ball(self, action) :
        """Ball actions returns true"""
        return not self._is_player(action)
        
    
    #XXX We will see
    def reward(self, state, action, next_state=None) :
        """reward is the instantaneous reward. It is usually R(s,a,s')"""
        #In grid world the reward depends only on state.
        ball_cell, actor_cell = state
        if ball_cell in self.terminals and action == 'Goal':
            return self.terminals[ball_cell] #dict has the terminal values +1 or -1
            
        if state == self.end_state :
            return 0.0
        
        return self.living_reward        #usually a small -ve value
    
    #XXX
    def transitions(self, state, action) :
        """returna list of tuple(nextstate, action, probability)"""
        #XXX for debug
        #self.debug['transitions_count'] = self.debug.get('transitions_count',0) + 1
        #actual_actions, probs = self.action_transitions[action]
        #In this version we keep everything with probability 1
        probs = 1.0
        return [self._next_state(state, action)], [action], [probs]
        #pass #Not implemented now.
    
    def move(self, state, action) :
        """Take the action and return the tuple(new_state, reward, is_terminal)"""
        #here state is a 2tuple, cell of (Ball, Player)
        
        new_state   = self._next_state(state, action)
        reward      = self.reward(state, action) #
        
        is_terminal = False
        if new_state == self.end_state :
            is_terminal = True
            
        return new_state, reward, is_terminal #keep the same for mat as OpenAI gym.
    
    def _next_state(self, state, action) : 
        """Blindly takes the action without checking anything and returns the position"""
        ball_cell, player_cell = state      
        ball_target, player_target = state #we change this
        
        assert action in self.actions(state)
        
        if ball_cell in self.terminals : #Doent matter which action; any action will do.
            return self.end_state
        
        if self._is_player(action) :
            player_target = self._move_player(player_cell, action)
        
        #rest of the stuff we can do only if actor is at the ball location
        if ball_cell == player_cell and self._is_ball(action) :      
            ball_target = self._kick_ball(ball_cell, action) 
           
        if self._valid_cell(player_target) and self._valid_cell(ball_target):
            return ball_target, player_target
        
        return state #stay put the target is invalid.
    
    def _valid_cell(self, cell) :
        """Returns true if the cell is a valid cell"""
        if cell in self.valid_cells :
            return True
        return False
    
    def _move_player(self, player_cell, action) :
        coord = soc_actions[action]
        player_target = player_cell[0] + coord[0], player_cell[1] + coord[1]
        return player_target
    
    def _kick_ball(self, ball_cell, action) :
        """Kicks the ball and return the target cell"""        
        #we designed the actions as tuples of increments.
        coord = soc_actions[action]
        ball_target = ball_cell[0] + coord[0], ball_cell[1] + coord[1]
        return ball_target 

    
    #pretty print the grid and agent if given.
    def print(self, state=None) :
        if state:
            ball_cell, agent_cell = state
        else :
            ball_cell, agent_cell = None, None
            
        for r in range(self.rows) :
            for c in range(self.columns) :
                cell = (r,c)
                if cell in self.walls :
                    print('#', end=' ')
                elif cell == agent_cell :
                    print('@', end=' ')
                elif cell == ball_cell :
                    print('O', end=' ')
                elif cell in self.terminals :
                    if self.terminals[cell] > 0 :
                        print('+', end=' ')
                    else :
                        print('-', end=' ')
                
                else :
                    print('. ', end='')
            print("")
        print("--" * self.columns)

In [3]:
gs = GridSoc(rows=5, columns=5)

start = ((0,0), (0,1))
action = 'MoveLeft'
n,r,d = gs.move(start,action)
print(r, n)
gs.print(n)

action = 'KickDown2'
n,r,d = gs.move(n,action)
print(r, n)
gs.print(n)


action = 'Goal'
n,r,d = gs.move(n,action)
print(r, n)
if not d:
    gs.print(n)

-0.01 ((0, 0), (0, 0))
@ . . . . 
. . . . . 
+ . . . . 
+ . . . . 
. . . . . 
----------
-0.01 ((2, 0), (0, 0))
@ . . . . 
. . . . . 
O . . . . 
+ . . . . 
. . . . . 
----------
1.0 ((-1, -1), (-1, -1))
