In [1]:
import pandas as pd
from sklearn import tree
import ast
import math
import pickle
import os
import glob

# Load data

You will get a number of files to train on. Each file contains one game. below is shown how to parse one game, you will have to train on multiple games.

In [2]:
path = r".\training_data"  
all_files = glob.glob(os.path.join(path, "*.csv"))     

training_data = []

#load the data into a pandas frames
for file in all_files:
    game_data = pd.read_csv(file,index_col=False)
    reason = game_data.iloc[-1][6]
    
    #if the reason is found, add it to each line to fill out the blanks
    if type(reason) is str:
        for index,row in game_data.iterrows():
            game_data.loc[index,'reason'] = reason

    #else there was no reason, implying the game reached the number of iterations
    else:
        for index,row in game_data.iterrows():
            game_data.loc[index,'reason'] = 'max_iterations'    
    
    training_data.append(game_data)

#preview the final 5 lines
training_data[-1].head()


Unnamed: 0,field_before,field_after,turn_made_by,move_made,score1,score2,reason
0,"[['.', '.', '.', '.', '.', '.', '.', '.', '.',...","[['.', '.', '.', '.', '.', '.', '.', '.', '.',...",player1 sheep,1,0,0,sheep1 eaten
1,"[['.', '.', '.', '.', '.', '.', '.', '.', '.',...","[['.', '.', '.', '.', '.', '.', '.', '.', '.',...",player2 sheep,-2,0,0,sheep1 eaten
2,"[['.', '.', '.', '.', '.', '.', '.', '.', '.',...","[['.', '.', '.', '.', '.', '.', '.', '.', '.',...",player1 sheep,2,0,0,sheep1 eaten
3,"[['.', '.', '.', '.', '.', '.', '.', '.', '.',...","[['.', '.', '.', '.', '.', '.', '.', '.', '.',...",player2 sheep,-1,0,0,sheep1 eaten
4,"[['.', '.', '.', '.', '.', '.', '.', '.', '.',...","[['.', '.', '.', '.', '.', '.', '.', '.', '.',...",player1 wolf,2,0,0,sheep1 eaten


# Feature selection and Instance selection 

The tree classifier in Scikitlearn needs two inputs: 
- Array X of size [N samples, N features] holding the training samples 
- Array Y of integer values [N samples] holding the class labels for the training samples

This implies we need to create such arrays from our training data. We need to both select which features can be useful for our tree (**feature selection**) and which data we want to use for our training (**instance selection**). 

An example of feature selection is the following: is there a rhubarb on the field [Y/N] or is the wolf close to the sheep [Y/N]? We want to figure out features that are meaningful to determine which move is best to take next, then train the tree to see which feature is more important to determine a successful move in the long run. 

An example of instance selection is the following: we might only want to use moves from winning agents, so that the tree only learns from 'excellent' agents. This way it does not learn what bad moves are that it should avoid, only what good moves are that it should learn.

We will need to do feature and instance selection for both the sheep and the wolf, since they have different optimal behavior in the game.

## Sheep

For the sheep example, we will create a feature that involves the opponent's wolf and a food item within two steps. Given that we have four possible outputs (left,right,up,down), we need to have four features  per topic that will all have the label Yes or No for each sample: 
- Is the wolf within two steps above the sheep? 
- Is the wolf within two steps below the sheep? 
- Is the wolf within two steps left of the sheep? 
- Is the wolf within two steps right of the sheep? 
- Is a food item within 2 steps above the sheep?
- Is a food item within 2 steps below the sheep?
- Is a food item within 2 steps left of the sheep?
- Is a food item within 2 steps right of the sheep?

We will iterate through the lines in the training data and for each line determine whether our features are present or not. We will save this in our feature array X_sheep. The class labels, being the move that was made in the training_date, will be saved in our label array Y_sheep.

We will use the order of the features as listed above.

In [3]:
X_sheep = []
Y_sheep = []
number_moves = 0

for game in training_data:
    
    #we want to learn from the winning player, which is the player with the highest score:
    if game.iloc[-1][4] < game.iloc[-1][5]:
        sheep = 's'
        wolf = 'W'
    
    elif game.iloc[-1][4] > game.iloc[-1][5]:
        sheep = 'S'
        wolf = 'w'
    else:
        continue

    rhubarb = 'r'
    grass = 'g'

    #for each game state in our training data
    for index,row in game.iterrows():

        #we don't want games that ended because of an error or because the sheep commited suicide
        if row['reason'] not in ('sheep1 eaten','sheep2 eaten','max_iterations'):
            continue

        #we want to only learn from sheep
        if row['move_made'] == 'player1 wolf' or row['move_made'] == 'player2 wolf':
            continue
        
        number_moves += 1
        
        #this is the move that we are learning from this game state
        move = row['move_made']

        #create empty feature array for this game state
        game_features = []

        #turn the field from before the move from a string back to a list
        field= ast.literal_eval(row['field_before'])

        #get positions of sheep, wolf and food items
        food = []
        y=0
        for field_row in field:
            x = 0
            for item in field_row:
                if item == sheep:
                    sheep_position = (x,y)
                elif item == wolf:
                    wolf_position = (x,y)
                elif item == rhubarb or item == grass:
                    food.append((x,y))
                x += 1
            y+=1

        #feature 1: determine if wolf within two steps up
        if sheep_position[1] - wolf_position[1] <= 2 and sheep_position[1] - wolf_position[1] > 0:
            s_feature1 = 1
        else:
            s_feature1 = 0
        game_features.append(s_feature1)

        #feature 2: determine if wolf within two steps down
        if sheep_position[1] - wolf_position[1] >= -2 and sheep_position[1] - wolf_position[1] < 0:
            s_feature2 = 1
        else:
            s_feature2 = 0
        game_features.append(s_feature2)

        #feature 3: determine if wolf within two steps left
        if sheep_position[0] - wolf_position[0] <= 2 and sheep_position[0] - wolf_position[0] > 0:
            s_feature3 = 1
        else:
            s_feature3 = 0
        game_features.append(s_feature3)

        #feature 4: determine if wolf within two steps right
        if sheep_position[0] - wolf_position[0] >= -2 and sheep_position[0] - wolf_position[0] < 0:
            s_feature4 = 1
        else:
            s_feature4 = 0
        game_features.append(s_feature4)

        s_feature5 = 0
        s_feature6 = 0
        s_feature7 = 0
        s_feature8 = 0

        #determine closest food:
        food_distance = 1000
        food_goal = None
        for food_item in food:
            distance = abs(food_item[0] - sheep_position[0]) + abs(food_item[1] - sheep_position[1])
            if distance < food_distance:
                food_distance = distance
                food_goal = food_item
        
        if food_goal != None:
            #feature 5: determine if closest food is below the sheep
            if sheep_position[1] - food_goal[1] < 0:
                s_feature5 = 1

            #feature 6: determine if closest food is above the sheep
            if sheep_position[1] - food_goal[1] > 0:
                s_feature6 = 1

            #feature 7: determine if closest food is right of the sheep
            if sheep_position[0] - food_goal[0] < 0:
                s_feature7 = 1

            #feature 8: determine if closest food is left of the sheep
            if sheep_position[0] - food_goal[0] > 0:
                s_feature8 = 1

        game_features.append(s_feature5)
        game_features.append(s_feature6)
        game_features.append(s_feature7)
        game_features.append(s_feature8)

        #add features and move to X_sheep and Y_sheep
        X_sheep.append(game_features)
        Y_sheep.append(move)

#this prints an example of our feature and outcome vector:
print(X_sheep[0])
print(Y_sheep[0])
print(number_moves)

[0, 0, 0, 0, 1, 0, 1, 0]
2
675911


## Wolf

For the wolf, we will use the position of the sheep to inspire our features:
- Is the sheep above the wolf?
- Is the sheep below the wolf?
- Is the sheep to the left of the wolf?
- is the sheep to the right of the wolf?
Like with the sheep, we will create two arrays for the training of our wolf's decision tree.

In [4]:
X_wolf = []
Y_wolf = []
number_moves = 0

for game in training_data:

    #for each game state in our training data
    for index,row in game.iterrows():
        
        #we only want games where the sheep was eaten (=the wolf was successful)
        if row['reason'] not in ('sheep1 eaten','sheep2 eaten'):
            continue
        
        #we want to only learn from wolves
        if row['move_made'] == 'player1 sheep' or row['move_made'] == 'player2 sheep':
            continue
        
        number_moves += 1
        
        #we want to learn from the wolf that ate the sheep
        if row['reason'] == 'sheep1 eaten':
            sheep = 'S'
            wolf = 'w'
        else:
            sheep = 's'
            wolf = 'W'

        rhubarb = 'r'
        grass = 'g'

        #this is the move that we are learning from this game state
        move = row['move_made']

        #create empty feature array for this game state
        game_features = []

        #turn the field from before the move from a string back to a list
        field= ast.literal_eval(row['field_before'])
        
        #get positions of sheep, wolf and food items
        y=0
        for field_row in field:
            x = 0
            for item in field_row:
                if item == sheep:
                    sheep_position = (x,y)
                elif item == wolf:
                    wolf_position = (x,y)
                x += 1
            y+=1

        #feature 1: determine if the sheep is above the wolf
        if wolf_position[1] - sheep_position[1] > 0:
            w_feature1 = 1
        else:
            w_feature1 = 0
        game_features.append(w_feature1)

        #feature 2: determine if the sheep is below the wolf
        if wolf_position[1] - sheep_position[1] < 0:
            w_feature2 = 1
        else:
            w_feature2 = 0
        game_features.append(w_feature2)

        #feature 3: determine if the sheep is left of the wolf
        if wolf_position[0] - sheep_position[0] > 0:
            w_feature3 = 1
        else:
            w_feature3 = 0
        game_features.append(w_feature3)

        #feature 4: determine if the sheep is right from the wolf
        if wolf_position[0] - sheep_position[0] < 0:
            w_feature4 = 1
        else:
            w_feature4 = 0
        game_features.append(w_feature4)

        #add features and move to X_wolf and Y_wolf
        X_wolf.append(game_features)
        Y_wolf.append(move)

#this prints an example of our feature and outcome vector:
print(X_wolf[0])
print(Y_wolf[0])
print(number_moves)

[0, 1, 0, 1]
1
171611


# Train sheep

In [5]:
sheep_tree = tree.DecisionTreeClassifier()
sheep_tree = sheep_tree.fit(X_sheep,Y_sheep)

# Train wolf

In [6]:
wolf_tree = tree.DecisionTreeClassifier()
wolf_tree = wolf_tree.fit(X_wolf,Y_wolf)

# Save models to files

We use the library called Pickle to save our trained model. We can later load it in our agent to use it to determine which move your agent should make.

In [7]:
sheep_filename = 'mplayer_sheep_model.sav'
wolf_filename = 'mplayer_wolf_model.sav'

pickle.dump(sheep_tree,open(sheep_filename,'wb'))
pickle.dump(wolf_tree,open(wolf_filename,'wb'))