# Install kaggle-environments

In [None]:
# 1. Enable Internet in the Kernel (Settings side pane)

# 2. Curl cache may need purged if v0.1.6 cannot be found (uncomment if needed). 
# !curl -X PURGE https://pypi.org/simple/kaggle-environments

# ConnectX environment was defined in v0.1.6
!pip install 'kaggle-environments>=0.1.6'

# Create ConnectX Environment

In [None]:
from kaggle_environments import evaluate, make, utils

env = make("connectx", debug=True)
env.render()


# Create an Agent

To create the submission, an agent function should be fully encapsulated (no external dependencies).  

When your agent is being evaluated against others, it will not have access to the Kaggle docker image.  Only the following can be imported: Python Standard Library Modules, gym, numpy, scipy, pytorch (1.3.1, cpu only), and more may be added later.



In [None]:
# This agent random chooses a non-empty column.
def my_agent(observation, configuration):
    WINNING_P1 = 100000
    WINNING_P2 = -100000
    
    #Tests the number of traps set in a particular column and row
    def eval_entry(player, row_num, col_num, observation, configuration):
        return_score = 0
        #Get token in current row and column
        def get(row_num, col_num):
            if row_num < 0 or row_num >= configuration.rows or col_num < 0 or col_num >= configuration.columns:
                return -1
            else:
                return observation.board[row_num * configuration.columns + col_num]
        #Test whether a current token is in a X in a row configuration
        #Delta stores the directions of the current X in a row we are testing (up down diagonal)
        def isInX(row_num, col_num, delta):
            numRight = 0
            for i in range(1, configuration.inarow + 1):
                if get(row_num + i*delta[0], col_num + i*delta[1]) == player:
                    numRight = numRight + 1
                else:
                    break
            numLeft = 0
            for i in range(1, configuration.inarow + 1):
                if get(row_num - i*delta[0], col_num - i*delta[1]) == player:
                    numLeft = numLeft + 1
                else:
                    break
            if numLeft + numRight + 1 >= configuration.inarow:
                return True
            else:
                return False
                
        def set_val(row_num, col_num, target):
            if isInX(row_num, col_num, [1,0]):
                return target
            elif isInX(row_num, col_num, [0,1]):
                return target
            elif isInX(row_num, col_num, [1,1]):
                return target
            elif isInX(row_num, col_num, [-1,1]):
                return target
            return 0

        if get(row_num, col_num) == 0:
            return_score = set_val(row_num, col_num, 1)
        elif get(row_num, col_num) == player:
            return_score = set_val(row_num, col_num, WINNING_P1)
        return return_score
    
    
    def eval_board(observation, configuration):
        ENDGAME_CONTROL = int(0.5*configuration.rows)
        #Give a point bonus to having the first trap in a column
        FIRST_BONUS = int(0.4*configuration.rows)
        score =  0
        def get(row_num, col_num):
            if row_num < 0 or row_num >= configuration.rows or col_num < 0 or col_num >= configuration.columns:
                return -1
            else:
                return observation.board[row_num * configuration.columns + col_num]

        #Iterate through al columns to evaluate each
        for col in range(configuration.columns):
            numZeros = 0
            for row in range(configuration.rows):
                if get(row, col) == 0:
                    numZeros = numZeros + 1
                else:
                    break
            evenControl = 0
            oddControl = 0
            for row in range(numZeros):
                if row % 2 == 0:
                    evenControl = evenControl + eval_entry(1, row, col, observation, configuration) - eval_entry(2, row, col, observation, configuration)
                elif row % 2 == 1:
                    oddControl=oddControl + eval_entry(1, row, col, observation, configuration) - eval_entry(2, row, col, observation, configuration)
            
            #Test for even numbered square control and odd numbered square control
            #If agent has both, we give them a bonus for endgame control in this column
            score = score + evenControl + oddControl
            if numZeros >= 2:
                score = score + (FIRST_BONUS - 1) * (eval_entry(1, numZeros-2, col, observation, configuration) - eval_entry(2, numZeros-2, col, observation, configuration))
            if evenControl > 0 and oddControl > 0:
                score = score + ENDGAME_CONTROL
            if evenControl < 0 and oddControl < 0:
                score = score - ENDGAME_CONTROL

            #Test every single row for instant wins and losses
            #We avoid instantly losing at all costs
            #If an instant win or loss exists, return score right away (saves massive computation time)
            for row in range(numZeros, configuration.rows):
                if get(row,col) == 1:
                    score = score + eval_entry(1, row, col, observation, configuration)
                    if score >= 50000:
                        return score
                elif get(row,col) == 2:
                    score = score - eval_entry(2, row, col, observation, configuration)
                    if score <= -50000:
                        return score
        return score    

    #Place a token and then take back right away
    def place(observation, configuration, col, player):
        #Get the specified token in the board
        def get(row_num, col_num):
            if row_num < 0 or row_num >= configuration.rows or col_num < 0 or col_num >= configuration.columns:
                return -1
            else:
                return observation.board[row_num * configuration.columns + col_num]
        
        #Update the observation board with the player data
        for row in range(configuration.rows):
            if get(row, col) == 0:
                if row == configuration.rows - 1 or get(row+1, col) == 1 or get(row+1, col) == 2:
                    observation.board[row*configuration.columns + col] = player
                    return row
        return -1
    
    #Maximizing agent in alpha-beta pruned minimax
    def maxi(observation, configuration, depth, alpha, beta):
        #If column isn't full, add to current list of moves
        moves = [c for c in range(configuration.columns) if observation.board[c] == 0]
        midPt = len(moves)//2
        #We want to bias to placing moves in the center, so we reorder the list
        moves = moves[midPt:] + moves[0:midPt]
        maxScore = None
        bestMove = None
        #If we are at the final depth in the tree, evaluate right away
        if depth == 0: 
            for move in moves:
                #Place token and take it back for the purpose of evaluation
                row_place = place(observation, configuration, move, 1)
                score = eval_board(observation, configuration)
                if maxScore is None or score > maxScore: 
                    maxScore = score;
                    bestMove = move
                #If pruning can be done, prune it
                #Otherwise return move and end right away
                if maxScore >= beta:
                    observation.board[row_place * configuration.columns + move] = 0
                    return [bestMove, maxScore]
                if maxScore > alpha:
                    alpha = maxScore
                
                
                observation.board[row_place * configuration.columns + move] = 0

        else:
            #Pair stores the data from the previous depth of either maxi or mini
            for move in moves:
                row_place = place(observation, configuration, move, 1)
                crnt_eval = eval_board(observation, configuration)
                if crnt_eval >= WINNING_P1/2 or  crnt_eval <= WINNING_P2/2:
                    pair = None
                else:
                    pair = mini(observation, configuration, depth-1, alpha, beta)
                if pair is not None and pair[0] is not None:
                    score = pair[1]
                    if maxScore is None or score > maxScore:
                        maxScore = score
                        bestMove = move
                    
                    if maxScore >= beta:
                        observation.board[row_place * configuration.columns + move] = 0
                        return [bestMove, maxScore]
                    if maxScore > alpha:
                        alpha = maxScore;      
                else:
                    score = crnt_eval
                    #Update maximum score and test for best move
                    if maxScore is None or score > maxScore:
                        maxScore = score
                        bestMove = move
                    #If move can be pruned, prune it
                    #Otherwise, end evaluation right away
                    if maxScore >= beta:
                        observation.board[row_place * configuration.columns + move] = 0
                        return [bestMove, maxScore]
                    if maxScore > alpha:
                        alpha = maxScore;
                    
                        
                observation.board[row_place * configuration.columns + move] = 0
        return [bestMove, maxScore]
    
    #Mini player in alpha-beta pruned decision tree engine
    def mini(observation, configuration, depth, alpha, beta):
        moves = [c for c in range(configuration.columns) if observation.board[c] == 0]
        midPt = len(moves)//2
        #Bias to placing moves in the center
        moves = moves[midPt:] + moves[0:midPt]
        #Initialize best move and minimum score variables
        minScore = None
        bestMove = None
        if depth == 0:
            for move in moves:
                row_place = place(observation, configuration, move, 2)
                score = eval_board(observation, configuration)
                #Update minimum score and keep going
                if minScore is None or score < minScore:
                    minScore = score;
                    bestMove = move
                #If the column can be pruned away, prune it
                #Otherwise, end evaluation right away
                if minScore <= alpha:
                    observation.board[row_place * configuration.columns + move] = 0
                    return [bestMove, minScore]
                if minScore < beta:
                    beta = minScore;
            
                observation.board[row_place * configuration.columns + move] = 0
        else:
            for move in moves:
                row_place = place(observation, configuration, move, 2)
                crnt_eval = eval_board(observation, configuration)
                if crnt_eval >= WINNING_P1/2 or crnt_eval <= WINNING_P2/2:
                    pair = None
                else:
                    pair = maxi(observation, configuration, depth-1, alpha, beta)
                if pair is not None and pair[0] is not None: 
                    score = pair[1]
                    if minScore is None or score < minScore:
                        minScore = score
                        bestMove = move
                    
                    if minScore <= alpha:
                        observation.board[row_place * configuration.columns + move] = 0
                        return [bestMove, minScore]
                    if minScore < beta:
                        beta = minScore
                    
                else:
                    score = crnt_eval
                    if minScore is None or score < minScore:
                        minScore = score
                        bestMove = move
                    
                    if minScore <= alpha:
                        observation.board[row_place * configuration.columns + move] = 0
                        return [bestMove, minScore]
                    if minScore < beta:
                        beta = minScore
                    
                    
                observation.board[row_place * configuration.columns + move] = 0
        return [bestMove, minScore]

    if observation.mark == 1:
        ans = maxi(observation, configuration, 4, -300000, 300000)
    else:
        ans = mini(observation, configuration, 4, -300000, 300000)
    return ans[0]

# Test your Agent

In [None]:
env.reset()
# Play as the first agent against default "random" agent.
env.run([ my_agent, my_agent])
env.render(mode="ipython", width=500, height=450)

# Debug/Train your Agent

In [None]:
# Play as first position against random agent.
trainer = env.train([None, "random"])

observation = trainer.reset()
while not env.done:
    my_action = my_agent(observation, env.configuration)
    print("My Action", my_action)
    observation, reward, done, info = trainer.step(my_action)
    # env.render(mode="ipython", width=100, height=90, header=False, controls=False)
env.render()

# Evaluate your Agent

In [None]:
def mean_reward(rewards):
    return sum(r[0] for r in rewards) / float(len(rewards))

# Run multiple episodes to estimate its performance.
print("My Agent vs Random Agent:", mean_reward(evaluate("connectx", [my_agent, "random"], num_episodes=10)))
print("My Agent vs Negamax Agent:", mean_reward(evaluate("connectx", [my_agent, "negamax"], num_episodes=10)))

# Play your Agent
Click on any column to place a checker there ("manually select action").

In [None]:
# "None" represents which agent you'll manually play as (first or second player).
env.play([None, "negamax"], width=500, height=450)

# Write Submission File



In [None]:
import inspect
import os

def write_agent_to_file(function, file):
    with open(file, "a" if os.path.exists(file) else "w") as f:
        f.write(inspect.getsource(function))
        print(function, "written to", file)

write_agent_to_file(my_agent, "submission.py")

# Validate Submission
Play your submission against itself.  This is the first episode the competition will run to weed out erroneous agents.

Why validate? This roughly verifies that your submission is fully encapsulated and can be run remotely.

In [None]:
# Note: Stdout replacement is a temporary workaround.
import sys
out = sys.stdout
submission = utils.read_file("/kaggle/working/submission.py")
agent = utils.get_last_callable(submission)
sys.stdout = out

env = make("connectx", debug=True)
env.run([agent, agent])
print("Success!" if env.state[0].status == env.state[1].status == "DONE" else "Failed...")

# Submit to Competition

1. Commit this kernel.
2. View the commited version.
3. Go to "Data" section and find submission.py file.
4. Click "Submit to Competition"
5. Go to [My Submissions](https://kaggle.com/c/connectx/submissions) to view your score and episodes being played.