In [9]:
# Import necessary libraries

from gym import Env
from gym.spaces import Discrete, Box 
import numpy as np
import random
import gym


In [10]:
# Possible directions of the wind in the horizontal axis
wind_direction_horizontal = ["North","South","East","West"]

# Possible direction of the wind in the vertical axis
wind_direction_vertical = ["Up","Down","None"]


## GYM ENVIRONMENT

In [11]:
# Class that defines the helicopter environment inherit from openAIs environment class
class HelicopterSpace(Env):

  # The init function of the environment class
  def __init__(self):

    # The possible observation space is x(0,100), y(0,100) and h(500,550)
    self.observation_space = Box(np.array([0,0,500]), np.array([100,100,550]))

    # Possible action i.e. the acceleration 
    self.action_space = Box(np.array([1]), np.array([10]))
    
    # State is an array of x position, y position and height from ground
    # The start state of the helicopter is (0,0) at a height of 525
    self.state = [0,0,525]
    
    # Initial reward is 0
    self.reward = 0
    
    #Initial velocity is 0
    self.initial_velocity = 0

    
  # Function to get the variables elements of the environment
  def variable_envo(self):
        
    # Get the direction of wind in the horizontal direction randomly from the wind_direction_horizontal list
    wdh = np.random.choice(wind_direction_horizontal)
    
    # Get the direction of wind in the vertical direction randomly from the wind_direction_vertical list
    wdv = np.random.choice(wind_direction_vertical)
    
    # Get the wind speed randomly between 0.1 and 5
    ws = np.random.uniform(0.1,5)
    
    return wdh, wdv, ws
    
    
  # Step function each time there is an action
  def step(self, action):
    
    # Call the function to get the variable elements of the environment
    wdh, wdv, ws = self.variable_envo()
    
    # Get the action of the environment passed as a parameter to this function
    self.action = action
    
    # Set the current state of the environment as the previous state
    prev_state = self.state

    # Call the function compute_position to get the new helicopter position
    x,y,h,u = self.compute_position(wdh, wdv, ws)
    
    # Update the state of the environment
    self.state = [x,y,h]
    
    # Update the initial velocity with the cuurent value
    self.initial_velocity = u

    # Conditions to update the reward
    if (self.state[0]>=90 and self.state[0]<=100) and (self.state[1]>=90 and self.state[1]<=100) and (self.state[2]>=520 and self.state[2]<=540):
      self.reward = self.reward + 1000
      done=True

    elif self.state[0]>110 or self.state[1]>110 or self.state[2]<490 or self.state[2]>570:
      self.reward = self.reward - 10
      done = True 

    elif self.state[0]<0 or self.state[0]>100 or self.state[1]<0 or self.state[1]>100 or self.state[2]<500 or self.state[2]>550:
      self.reward = self.reward - 10
      done = False

    elif self.state[0]<prev_state[0] or self.state[1]<prev_state[1]:
      self.reward = self.reward - 2
      done = False

    else:
      self.reward = self.reward + 1
      done = False

    return self.state, self.reward, done

  # Function to compute the new position of the helicopter
  def compute_position(self,wdh,wdv,ws):
    
    # Get the current position of the helicopter
    x = self.state[0] 
    y = self.state[1]
    h = self.state[2]
      
    # Acceleration is the rate of change of velocity
    # acc = (v - u)/ (t_2 - t_1);  (t_2 - t_1)=1
    # v = acc + u
    v = self.action + self.initial_velocity

    # Displacement due to the acceleration
    # d is the distance = speed * time
    d = v*(1) 

    # Displacement caused by the wind
    dis = 0.7*ws

    # Get the new position of the helicopter based on the wind direction
    if wdh=="East":
        x = x + d + dis
        y = y + d

    elif wdh=="West":
        x = x + d - dis
        y = y + d 

    elif wdh=="North":
        x = x + d
        y = y + d + dis

    elif wdh=="South":
        x = x + d
        y = y + d - dis

    if wdv=="Up":
        h = h + dis

    elif wdv=="Down":
        h = h - dis

    elif wdv=="None":
        h = h

    # Return the new position along with the velocity
    return x,y,h,v


  # The reset function which is set to the initial values
  def reset(self):
    self.state = [0,0,525]
    self.reward = 0
    self.initial_velocity = 0
    done = False


## TEST THE ENVIRONMENT

In [12]:
# Create an instance of the custom environment
env = HelicopterSpace()

# Number of episodes
episodes = 1

# Set done as False
done = False

# Loop over all the steps
for i in range(episodes):

  # Set the intial number of seconds to reach your goal 
  step = 1

  # Set done as False
  done = False
    
  # Loop over the entire episode or until the maximum number of seconds to reach your goal 
  while done!=True and step<200:

    step+= 1

    # Sample an action from the action_space of the environment
    action = env.action_space.sample()

    # Call the step function within the environment
    state, reward, done = env.step(action)
    
  # Print the reward at the end of each episode
  print("The reward of this episode is:",reward)

  # Call the reset function at the end of an episode
  env.reset()


The reward of this episode is: -15
