# Script for checking trained weights for Bipedal Walker
#### (Only activate env.render() if computing in local machine, doesn't work on google colab)

In [1]:
import numpy as np
from tensorflow import keras
import matplotlib.pyplot as plt
import gym

In [2]:
stateInput = keras.Input(shape = (24))

dense1 = keras.layers.Dense(units = 128, activation = "relu") (stateInput)
dense2 = keras.layers.Dense(units = 128, activation = "relu") (dense1)
dense3 = keras.layers.Dense(units = 128, activation = "relu") (dense2)

outputLayer = keras.layers.Dense (units = 4, activation = "tanh") (dense3)

model = keras.Model (inputs = stateInput, outputs = outputLayer)
model.compile(optimizer=keras.optimizers.Adam(lr=0.001),loss ="mse")
model.load_weights("Bipedal2.h5")

In [3]:
def getBipedalWalkerFitness ():

    env = gym.make('BipedalWalker-v3')

    done = False
    bipedalWalkerReward = 0
    prevObs = np.expand_dims(env.reset(),axis=0)

    for step in range(2002):
        if (done):
            bipedalWalkerReward += (env.hull.position[0]-4.5)*10
            break  
        #env.render()
        action = model.predict(prevObs)[0]
        
        state, reward, done,_ = env.step(action)
        state = np.expand_dims(state,axis=0)
        
        if (np.abs(np.sum(state-prevObs)) < 5E-7 ):
            reward -= 70
            done    = True
      
        prevObs = state
       
        bipedalWalkerReward += reward
    
    env.close()

    print(bipedalWalkerReward)

In [4]:
def getOfficialBipedalWalkerFitness ():
    

    env = gym.make('BipedalWalker-v3')

    done = False
    bipedalWalkerReward = 0
    prevObs = np.expand_dims(env.reset(),axis=0)

    for step in range(2002):
        if (done):
            #bipedalWalkerReward += (env.hull.position[0]-4.5)*10
            break  
        env.render()
        action = model.predict(prevObs)[0]
        
        state, reward, done,_ = env.step(action)
        state = np.expand_dims(state,axis=0)
        if (np.abs(np.sum(state-prevObs)) < 5E-7 ):
            #reward -= 70
            done    = True
        prevObs = state
       
        bipedalWalkerReward += reward
    env.close()
    print (bipedalWalkerReward)

In [5]:
numberOfEpisodes = 2
for i in range (numberOfEpisodes): # Chose which Fitness function to use:
    getBipedalWalkerFitness()
    #getOfficialBipedalWalkerFitness()



262.36624589938623
305.1240566441587
