In [2]:
from Connect4 import Connect4
from Robots import Robots
from GamePlay import GamePlay
from tensorflow.keras.models import Sequential, save_model, load_model
import numpy as np
import random

import datetime


import keras
from keras import layers
from keras import models

In [None]:
#class that creates the connect4 environment
class DeepQLAgent():

    def __init__(self,player=1,gameplay=GamePlay(),memory=None):
        self.player=player
        
        self.gameplay=gameplay
        self.ROWS = self.gameplay.ROWS
        self.COLUMNS = self.gameplay.COLUMNS
        
        self.batch_size = 100
        self.lr=.01
        self.INPUT_SHAPE=(self.ROWS, self.COLUMNS, 1)
        self.action_size =self.COLUMNS
        self.gamma=.9
        self.epsilon = 1.0 #starting explore probability
        self.explore_stop = .01 #stopping explore rate
        self.decay_rate = .000001 #decay rate # 0.00001

        self.memory_size = 1000 # number of experiences to keep
        self.memory_start = 100 # starting size of memory bank
        self.memory=self._memory_initiate()
        
        self.model=self._build_model()

    #build model
    def _build_model(self):
        model=models.Sequential()
        model.add(layers.Conv2D(32, kernel_size = (3, 3), activation='relu', input_shape=self.INPUT_SHAPE))

        #model.add(Conv2D(64, (3, 3), activation = 'relu')) 
        model.add(layers.MaxPooling2D(pool_size = (2, 2))) 
        model.add(layers.Dropout(0.25)) 
        model.add(layers.Flatten()) 
        model.add(layers.Dense(64, activation = 'relu')) 
        model.add(layers.Dense(32,activation='relu'))
        model.add(layers.Dense(self.action_size,activation='softmax'))

        model.compile(optimizer='adam',
                     loss='mse',
                     metrics=['accuracy'])
        return model


    def _memory_initiate(self):
        memory=[]
        turn = 0
        #player=1
        status = None
        while len(memory)<self.memory_start:
            if status != 'Keep Playing!': #if we are in a terminal state restart game
                self.gameplay.reset()
                turn = 0

            state = self.gameplay.BOARD.copy()
            actions = self.gameplay.Get_Legal_Moves(self.gameplay.BOARD)
            action = np.random.choice(actions,1)[0]
            #take random actions. record their states,actions, rewards, next states, and status

            #player1
            if turn % 2 ==0:
                self.gameplay.Add_Piece(1,action,self.gameplay.BOARD)
                #turn +=1

            #player2
            elif turn % 2 ==1:
                self.gameplay.Add_Piece(2,action,self.gameplay.BOARD)
                #turn +=1


            status = self.gameplay.Check_Goal(self.gameplay.BOARD)
            if status !='Keep Playing!':
                state_prime = np.zeros((self.gameplay.ROWS,self.gameplay.COLUMNS))
                state_prime[state_prime < 1] = self.player
            else:
                state_prime = self.gameplay.BOARD.copy() #gamestate
            reward = self.gameplay.Get_Score(self.player,state_prime)

            if turn % 2 ==0 and self.player==1:
                memory.append((state,action,reward,state_prime,status)) #memorize this
            if turn % 2 ==1 and self.player==2:
                memory.append((state,action,reward,state_prime,status)) #memorize this
            turn +=1
        return memory    
    

    #function to hold previous states/actions/rewards/nextstates/status
    def memorize(self,state,action,reward,state_prime,done):
        if len(self.memory)>=self.memory_size: #if memory is full remove random element before adding new one
            self.memory.pop(random.randrange(len(self.memory)))  
        self.memory.append((state,action,reward,state_prime,done)) #append memory

    def load(self,name):
        self.model.load_weights(name)

    def save(self,name):
        self.model.save_weights(name)
    

    #predict values of state using model
    #I hate shapes/shaping with numpy/keras; can never get it to work so this is a function to reshape it for me
    def model_predict(self,state):
        state_reshape=np.expand_dims(np.expand_dims(state, axis=3),axis=0)
        return self.model.predict(state_reshape)
    
    #function to take action given state                  
    def make_move(self,state): 
        if np.random.rand() <=self.epsilon:
            #explore
            #print('explore, epsilon is',self.epsilon)
            legal_actions=self.gameplay.Get_Legal_Moves(state)
            return random.randrange(len(legal_actions))
        else:
            #don't explore
            qvals = self.model_predict(state)
            return np.argmax(qvals)


    def replay(self,batch_size):
        batch = random.sample(self.memory,self.batch_size)
        for el in batch:
            if self.player==1: #replace 2's with -1s
                el[0][el[0]>1]=-1
                el[3][el[3]>1]=-1
            if self.player==2: #replace 1s with -1s. replace 2's with 1s
                el[0][(el[0] < 2) & (el[0] > 0)] = -1
                el[0][el[0]>1]=1                
                el[3][(el[3] < 2) & (el[3] > 0)] = -1
                el[3][el[3]>1]=1                
        
        states = []
        targets_f = []
        for state,action, reward, state_prime,status in batch:
            if status != 'Keep Playing!':
                target = reward 
            elif status == 'Keep Playing!':
                target = (reward + self.gamma * np.argmax(self.model_predict(state_prime)[0])) #idk why they have first element.

            target_f = self.model_predict(state_prime)
            targets_f.append(target_f[0])
            states.append(state)
            
        states=np.expand_dims(np.array(states), axis=3) #reshaping to train model
        targets_f=np.array(targets_f)

        history = self.model.fit(states,targets_f,epochs=1,verbose=0)
        loss = history.history['loss'][0]
        if self.epsilon <= self.explore_stop:
            self.epsilon=self.explore_stop
        else:
            self.epsilon = self.epsilon-self.decay_rate
        #print('epsilon:',self.epsilon)



In [None]:
batch_size=100
agent=DeepQLAgent()
agent.replay(batch_size)

agent2=DeepQLAgent(player=2)
agent2.replay(batch_size)


gameplay=GamePlay()

from Robots import Robots
bot=Robots(depth=1)

In [None]:
from Robots import Robots
bot=Robots(depth=1)
#bot.MiniMax_bot(state,2)

In [None]:
agent.epsilon=.4

In [None]:
EPISODES = 10000
episode=0
threshold=60

batch_size=100
turn=0
p1wins=0
p2wins=0

p1prev=0
p2prev=0

while episode<EPISODES:

    #player1
    if turn % 2 ==0:
        state=gameplay.BOARD.copy() #get state
        #print('player1:',state)
        action=agent.make_move(state) #get action
        legal=gameplay.Get_Legal_Moves(state)
        #print('ACTION:',action)
        #print(state)
        gameplay.Add_Piece(1,action,gameplay.BOARD) #do action
        state_prime = gameplay.BOARD.copy() #get next state
        reward = gameplay.Get_Score(1,state_prime) #reward for taking action/being in next state
        status=gameplay.Check_Goal(gameplay.BOARD)
        if action not in legal:
            reward = -10000000000000

        if status=='Player 1 wins!':
            reward==5000
        agent.memorize(state,action,reward,state_prime,status)

    if turn % 2 ==1:
        state=gameplay.BOARD.copy() #state prime
        #print('player2:',state)
        #action=agent2.make_move(state)
        action=bot.MiniMax_bot(state,2)
        gameplay.Add_Piece(2,action,gameplay.BOARD)
        status=gameplay.Check_Goal(gameplay.BOARD)

    #check if it is the end of the episode
    if status !='Keep Playing!' or turn>threshold:
        if status == 'Player 1 wins!':
            p1wins=p1wins+1
        if status == 'Player 2 wins!':
            p2wins=p2wins+1
            
        if episode % 500==0:
            winrate=(p1wins-p1prev)/(p1wins+p2wins-p1prev-p2prev)
            print('EPISODE: ',episode)
            print('epsilon:',agent.epsilon)
            print('player 1 wins:',p1wins,'player 2 wins:',p2wins, 'winrate:',winrate)
            p1prev=p1wins
            p2prev=p2wins
        if turn % 2 ==1:  #updating reward in our memory if we lost
            agent.memory[-1]=(agent.memory[-1][0],agent.memory[-1][1],-5000,agent.memory[-1][3],status) 

        loss=agent.replay(batch_size) # do the thing
        agent.save('weights.h5')
        #agent2.load('weights.h5') # load weights of the updated agent for player 2
        #agent2.epsilon=agent.epsilon
        turn=0
        if episode==100:
            print('EPISODE:',episode,'epsilon:',agent.epsilon)
            print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            keras.models.save_model(agent.model,'mymodel_'+str(episode)+'.h5')

        if episode==1000:
            print('EPISODE:',episode,'epsilon:',agent.epsilon)
            print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            keras.models.save_model(agent.model,'mymodel_'+str(episode)+'.h5')
            
        if episode==10000:
            print('EPISODE:',episode,'epsilon:',agent.epsilon)
            print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            keras.models.save_model(agent.model,'mymodel_'+str(episode)+'.h5')
        if episode==20000:
            print('EPISODE:',episode,'epsilon:',agent.epsilon)
            print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            keras.models.save_model(agent.model,'mymodel_'+str(episode)+'.h5')
        if episode==50000:
            print('EPISODE:',episode,'epsilon:',agent.epsilon)
            print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            keras.models.save_model(agent.model,'mymodel_'+str(episode)+'.h5')
           
        if episode % 100000==0:
            print('EPISODE:',episode,'epsilon:',agent.epsilon)
            print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            keras.models.save_model(agent.model,'mymodel_'+str(episode)+'.h5')

        gameplay.reset()
        episode=episode+1
        #print('memory:',agent.memory)

    turn = turn + 1


In [None]:
--------------------------------------------------------

In [3]:
#class that creates the connect4 environment
class DeepQLAgent():

    def __init__(self,player=1,gameplay=GamePlay(),memory=None):
        self.player=player
        
        self.gameplay=gameplay
        self.ROWS = self.gameplay.ROWS
        self.COLUMNS = self.gameplay.COLUMNS
        
        self.batch_size = 50
        self.lr=.001
        self.INPUT_SHAPE=(self.ROWS, self.COLUMNS, 1)
        self.action_size =self.COLUMNS
        self.gamma=.9
        self.epsilon = 1.0 #starting explore probability
        self.explore_stop = .01 #stopping explore rate
        self.decay_rate = 0.00001 #decay rate # 0.00001

        self.memory_size = 5000 # number of experiences to keep
        self.memory_start = 50 # starting size of memory bank
        self.memory=self._memory_initiate()
        
        self.model=self._build_model()
        self.losses=[]

    #build model
    def _build_model(self):
        model=models.Sequential()
        '''model.add(layers.Flatten(input_shape=self.INPUT_SHAPE))
        #model.add(Conv2D(64, (3, 3), activation = 'relu')) 
        model.add(layers.Dense(64, activation = 'relu'))
        model.add(layers.Dense(64, activation = 'relu')) 
        model.add(layers.Dense(32,activation='relu'))
        model.add(layers.Dense(self.action_size,activation='linear'))'''

        model.add(layers.Conv2D(32, kernel_size = (3, 3), activation='relu', input_shape=self.INPUT_SHAPE))

        #model.add(Conv2D(64, (3, 3), activation = 'relu')) 
        model.add(layers.MaxPooling2D(pool_size = (2, 2))) 
        model.add(layers.Dropout(0.25)) 
        model.add(layers.Flatten()) 
        model.add(layers.Dense(64, activation = 'relu')) 
        model.add(layers.Dense(32,activation='relu'))
        model.add(layers.Dense(self.action_size,activation='linear'))

        optimizer = keras.optimizers.Adam(lr=0.01)
        model.compile(loss='mse', optimizer=optimizer)
        
        return model


    def _memory_initiate(self):
        memory=[]
        turn = 0
        #player=1
        status = None
        while len(memory)<self.memory_start:
            if status != 'Keep Playing!': #if we are in a terminal state restart game
                self.gameplay.reset()
                turn = 0

            state = self.gameplay.BOARD.copy()
            actions = self.gameplay.Get_Legal_Moves(self.gameplay.BOARD)
            action = np.random.choice(actions,1)[0]
            #take random actions. record their states,actions, rewards, next states, and status

            #player1
            if turn % 2 ==0:
                self.gameplay.Add_Piece(1,action,self.gameplay.BOARD)
                #turn +=1

            #player2
            elif turn % 2 ==1:
                self.gameplay.Add_Piece(2,action,self.gameplay.BOARD)
                #turn +=1


            status = self.gameplay.Check_Goal(self.gameplay.BOARD)
            if status !='Keep Playing!':
                state_prime = np.zeros((self.gameplay.ROWS,self.gameplay.COLUMNS))
                state_prime[state_prime < 1] = self.player
            else:
                state_prime = self.gameplay.BOARD.copy() #gamestate
            reward = self.gameplay.Get_Score(self.player,state_prime)

            if turn % 2 ==0 and self.player==1:
                memory.append((state,action,reward,state_prime,status)) #memorize this
            if turn % 2 ==1 and self.player==2:
                memory.append((state,action,reward,state_prime,status)) #memorize this
            turn +=1
        return memory    
    

    #function to hold previous states/actions/rewards/nextstates/status
    def memorize(self,state,action,reward,state_prime,done):
        if len(self.memory)>=self.memory_size: #if memory is full remove random element before adding new one
            self.memory.pop(random.randrange(len(self.memory)))  
        self.memory.append((state,action,reward,state_prime,done)) #append memory

    def load(self,name):
        self.model.load_weights(name)

    def save(self,name):
        self.model.save_weights(name)
    

    #predict values of state using model
    #I hate shapes/shaping with numpy/keras; can never get it to work so this is a function to reshape it for me
    def model_predict(self,state):
        state_reshape=np.expand_dims(np.expand_dims(state, axis=3),axis=0)
        return self.model.predict(state_reshape)
    
    #function to take action given state                  
    def make_move(self,state): 
        if np.random.rand() <=self.epsilon:
            #explore
            #print('explore, epsilon is',self.epsilon)
            legal_actions=self.gameplay.Get_Legal_Moves(state)
            return random.randrange(len(legal_actions))
        else:
            #don't explore
            qvals = self.model_predict(state)
            return np.argmax(qvals)


    def replay(self,batch_size):
        batch = random.sample(self.memory,self.batch_size)
        for el in batch:
            if self.player==1: #replace 2's with -1s
                el[0][el[0]>1]=-1
                el[3][el[3]>1]=-1
            if self.player==2: #replace 1s with -1s. replace 2's with 1s
                el[0][(el[0] < 2) & (el[0] > 0)] = -1
                el[0][el[0]>1]=1                
                el[3][(el[3] < 2) & (el[3] > 0)] = -1
                el[3][el[3]>1]=1                
        
        states = []
        targets_f = []
        for state,action, reward, state_prime,status in batch:
            if status != 'Keep Playing!':
                target = reward 
            elif status == 'Keep Playing!':
                if action not in gameplay.Get_Legal_Moves(state): #if not a legal move make target the negative reward
                    target = reward
                else:
                    target = (reward + self.gamma * np.argmax(self.model_predict(state_prime)[0])) #idk why they have first element.

            target_f = self.model_predict(state_prime)
            target_f[0][action]=target
            targets_f.append(target_f[0])
            states.append(state)
            
        states=np.expand_dims(np.array(states), axis=3) #reshaping to train model
        targets_f=np.array(targets_f)

        history = self.model.fit(states,targets_f,epochs=1,verbose=0)
        loss = history.history['loss'][0]
        self.losses.append(loss)
        if self.epsilon <= self.explore_stop:
            self.epsilon=self.explore_stop
        else:
            self.epsilon = self.epsilon-self.decay_rate
        #print('epsilon:',self.epsilon)



In [6]:
gameplay=GamePlay()
batch_size=50
agent=DeepQLAgent()
agent.replay(batch_size)

agent2=DeepQLAgent(player=2)
agent2.replay(batch_size)


from Robots import Robots
bot=Robots(depth=1)
#bot.MiniMaxAlphaBeta_bot(state)



In [7]:
#secondgo
EPISODES = 200000
episode=0
threshold=60

batch_size=50
turn=0
p1wins=0
p2wins=0

p1prev=0
p2prev=0

#agent.epsilon=.5 .6
#agent.epsilon=.2

while episode<EPISODES:

    #player1
    if turn % 2 ==0:
        state=gameplay.BOARD.copy() #get state
        action=agent.make_move(state) #get action
        gameplay.Add_Piece(1,action,gameplay.BOARD) #do action
        status=gameplay.Check_Goal(gameplay.BOARD)

    if turn % 2 ==1:
        #action2=bot.Rando_bot(state)
        #action2=agent2.make_move(state)
        action2=bot.MiniMaxAlphaBeta_bot(state,2)
        gameplay.Add_Piece(2,action2,gameplay.BOARD)
        state_prime = gameplay.BOARD.copy()
        reward = 0#gameplay.Get_Score(1,state_prime)
        status=gameplay.Check_Goal(gameplay.BOARD)

    if turn % 2 ==0:
        if status == 'Player 1 wins!':
            reward=5000
            state_prime = gameplay.BOARD.copy()
            agent.memorize(state,action,reward,state_prime,status)
        if status =='Player 2 wins!':
            reward=-5000
            agent.memorize(state,action,reward,state_prime,status)
        elif turn==0:
            pass
        else:
            if action not in gameplay.Get_Legal_Moves(gameplay.BOARD):
                reward=-5000
            agent.memorize(state,action,reward,state_prime,status)


    #check if it is the end of the episode
    if status !='Keep Playing!' or turn>threshold:
        if status == 'Player 1 wins!':
            p1wins=p1wins+1
        if status == 'Player 2 wins!':
            p2wins=p2wins+1


        if episode % 1000==0:
            print('EPISODE: ',episode)
            print('Avg loss last 1000:',round(sum(agent.losses)/len(agent.losses)+.001,2),'epsilon:',agent.epsilon)
            agent.losses=[]
            winrate=round((p1wins-p1prev)/(p1wins+p2wins-p1prev-p2prev+.001),2)
            print('player 1 wins:',p1wins,'player 2 wins:',p2wins, 'winrate:',winrate)
            p1prev=p1wins
            p2prev=p2wins

        loss=agent.replay(batch_size) # do the thing
        
        agent.save('weights.h5')
        agent2.load('weights.h5') # load weights of the updated agent for player 2
        agent2.epsilon=agent.epsilon
        turn=0
        if episode==100:
            print('EPISODE:',episode,'epsilon:',agent.epsilon)
            print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            keras.models.save_model(agent.model,'mymodel_'+str(episode)+'.h5')

        if episode==1000:
            print('EPISODE:',episode,'epsilon:',agent.epsilon)
            print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            keras.models.save_model(agent.model,'mymodel_'+str(episode)+'.h5')
            
        if episode==10000:
            print('EPISODE:',episode,'epsilon:',agent.epsilon)
            print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            keras.models.save_model(agent.model,'mymodel_'+str(episode)+'.h5')
        if episode==20000:
            print('EPISODE:',episode,'epsilon:',agent.epsilon)
            print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            keras.models.save_model(agent.model,'mymodel_'+str(episode)+'.h5')
        if episode==50000:
            print('EPISODE:',episode,'epsilon:',agent.epsilon)
            print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            keras.models.save_model(agent.model,'mymodel_'+str(episode)+'.h5')
           
        if episode % 100000==0:
            print('EPISODE:',episode,'epsilon:',agent.epsilon)
            print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            keras.models.save_model(agent.model,'mymodel_'+str(episode)+'.h5')

        gameplay.reset()
        episode=episode+1
        #print('memory:',agent.memory)

    turn = turn + 1


EPISODE:  0
Avg loss last 1000: 162816.11 epsilon: 0.99999
player 1 wins: 0 player 2 wins: 1 winrate: 0.0




EPISODE: 0 epsilon: 0.9999800000000001
2020-12-01 17:09:21
EPISODE: 100 epsilon: 0.9989800000000046
2020-12-01 17:09:42
EPISODE:  1000
Avg loss last 1000: 125656.12 epsilon: 0.9899900000000456
player 1 wins: 26 player 2 wins: 975 winrate: 0.03
EPISODE: 1000 epsilon: 0.9899800000000456
2020-12-01 17:12:59


KeyboardInterrupt: 

In [None]:
agent.losses

In [None]:
        for el in batch:
            if self.player==1: #replace 2's with -1s
                el[0][el[0]>1]=-1
                el[3][el[3]>1]=-1
            if self.player==2: #replace 1s with -1s. replace 2's with 1s
                el[0][(el[0] < 2) & (el[0] > 0)] = -1
                el[0][el[0]>1]=1                
                el[3][(el[3] < 2) & (el[3] > 0)] = -1
                el[3][el[3]>1]=1                
        
        states = []
        targets_f = []

In [None]:
states = []
targets_f = []
batch = random.sample(agent.memory,agent.batch_size)
for state,action, reward, state_prime,status in batch:
    if status != 'Keep Playing!':
        target = reward
    elif status == 'Keep Playing!':
        if action not in gameplay.Get_Legal_Moves(state): #if not a legal move make target the negative reward
            target = reward
        else:
             target = (reward + agent.gamma * np.argmax(agent.model_predict(state_prime))) #idk why they have first element.

    target_f = agent.model_predict(state_prime)
    print(target_f[0][action])
    targets_f.append(target_f[0][action])
    states.append(state)
    #print('STATE:',state)
    #print('ACTION:',action)
    #print('target:',target)
    #print('reward:',reward)

            
states=np.expand_dims(np.array(states), axis=3) #reshaping to train model
targets_f=np.array(targets_f)


In [None]:
keras.models.save_model(agent.model,'mymodel_'+str(episode)+'.h5')

In [None]:
agent.memory

In [None]:
for mem in agent.memory:
    #print(mem[0])
    print(np.argmax(agent.model_predict(mem[0])))

In [None]:
from tensorflow.keras.models import Sequential, save_model, load_model
keras.models.save_model(agent.model,'mymodel.h5')

In [None]:
agent.memory[-1][2]

In [None]:
agent.memory[1][0]

In [None]:
gameplay.BOARD

In [None]:
if status != 'Keep Playing!': #if we are in a terminal state restart game
    self.gameplay.reset()
    turn = 0

state = self.gameplay.BOARD.copy()
actions = self.gameplay.Get_Legal_Moves(self.gameplay.BOARD)
action = np.random.choice(actions,1)[0]
#take random actions. record their states,actions, rewards, next states, and status

    #player1
if turn % 2 ==0:
    self.gameplay.Add_Piece(1,action,self.gameplay.BOARD)
        #turn +=1

    #player2
elif turn % 2 ==1:
    self.gameplay.Add_Piece(2,action,self.gameplay.BOARD)
        #turn +=1


status = self.gameplay.Check_Goal(self.gameplay.BOARD)
if status !='Keep Playing!':
    state_prime = np.zeros((self.gameplay.ROWS,self.gameplay.COLUMNS))
    state_prime[state_prime < 1] = 1
else:
    state_prime = self.gameplay.BOARD.copy() #gamestate
reward = self.gameplay.get_reward(self.player,state)

if turn % 2 ==0 and self.player==1:
    memory.append((state,action,reward,state_prime,status)) #memorize this
if turn % 2 ==1 and self.player==2:
    memory.append((state,action,reward,state_prime,status)) #memorize this
turn +=1


In [None]:

keras.models.save_model(
agent1.model,'mymodel.h5')

In [None]:
#saving model and using it for predictions
from tensorflow.keras.models import Sequential, save_model, load_model

keras.models.save_model(
agent.model,'mymodel.h5')

loaded_model = load_model('mymodel.h5')
state=np.expand_dims(np.expand_dims(gameplay.BOARD, axis=3),axis=0)
loaded_model.predict(state)

In [None]:
state=np.expand_dims(np.expand_dims(gameplay.BOARD, axis=3),axis=0)
loaded_model.predict(state)

In [None]:
import keras
from keras import layers
from keras import models

model=models.Sequential()
model.add(layers.Conv2D(16, (3,3), activation='relu', input_shape=INPUT_SHAPE))
model.add(layers.MaxPooling2D((2,2)))

model.add(layers.Flatten())
model.add(layers.Dense(32,activation='relu'))
model.add(layers.Dense(action_size,activation='softmax'))

model.compile(optimizer='adam',
             loss='categorical_crossentropy',
             metrics=['accuracy'])
model.summary()