In [1]:
import gym
import numpy as np
import matplotlib as plt
import random

2

In [10]:
class SailingEnv(gym.Env):
    def __init__(self, max_steps=200, alpha=0.1, gamma=0.9, epsilon=0.9):
        self.action_space = gym.spaces.Discrete(16)
        self.observation_space = gym.spaces.Box(low=np.array([0, 0]), high=np.array([20, 20]), dtype=np.float32)
        self.wind_directions = np.linspace(0, np.pi, 10)
        self.sail_angles = np.linspace(-np.pi/4, np.pi/4, 16)
        self.sail_angle = 0
        #self.boat_position = np.random.randint(90, size=(1, 2))[0]
        #self.target_position = np.array([35, 69])
        self.max_steps = max_steps
        self.current_step = 0
        self.alpha = alpha
        self.gamma = gamma 
        self.epsilon = epsilon
        self.q_table = self.q_table = np.zeros((10, 16, 21, 21, 91)) 
        #q_table is a 5-dimensional NumPy array used to store the Q-values for each 
        #state-action pair in the environment. Here is what each dimension represents:
        #The first dimension corresponds to the 10 possible wind directions that the boat can encounter during the episode.
        #The second to the 16 possible sail angles that the boat can choose in each time step.
        #The third to the boat's x-position, which can range from 0 to 20.
        #The fourth to the boat's y-position, which can range from 0 to 20.
        #The fifth is the manhattan distance between the boat and the target (The Manhattan distance is an appropriate metric to use because it 
        # measures the shortest path that the boat can take to reach the target position, if the boat is only allowed to move vertically or 
        # #horizontally, but not diagonally.)
        
    def reset(self):
        self.wind_direction = np.random.choice(self.wind_directions)
        self.sail_angle = 0
        self.boat_position = np.random.randint(90, size=(1, 2))[0]
        self.target_position = np.array([35, 69])
        self.current_step = 0
        return self.boat_position, self.wind_direction, self.target_position
        
    def step(self, action):
        assert self.action_space.contains(action)

        # Update the boat's position based on the chosen sail angle and wind direction
        self.sail_angle = self.sail_angles[action]
        heading = np.arctan2(np.sin(self.wind_direction - self.sail_angle), np.cos(self.wind_direction - self.sail_angle))
        velocity = np.array([np.cos(heading), np.sin(heading)])
        self.boat_position = self.boat_position + velocity

        # Calculate the reward and check if the episode is done
        distance_to_target = np.linalg.norm(self.boat_position - self.target_position)
        
        self.current_step += 1
        done = False
        truncated = False
        reward = -0.5
        if self.current_step >= self.max_steps: #or not np.any(np.round(self.boat_velocity,2)): #checks whether any element of the rounded self.boat_velocity array is non-zero.
            truncated = True    
     
        else:
            distance_to_target < 1.0 
            reward = 100
            print(distance_to_target)
            done = True
        #reward, done = self.calculate_reward()
        # Update the wind direction for the next time step
        new_wind_direction = np.random.choice(self.wind_directions)
        #return new_wind_direction

        # Return the new observation, reward, and done flag
        #observation = self.get_observation()
        return  reward, done, {}


In [11]:
env = SailingEnv()
env.reset()

(array([52, 17]), 0.0, array([35, 69]))

In [5]:
a=np.array([0,0])
np.any(a)

False

In [12]:
env.step(9)

55.171102834223085


(100, True, {})

In [13]:
env.sail_angle = env.sail_angles[9]
heading = np.arctan2(np.sin(env.wind_direction - env.sail_angle), np.cos(env.wind_direction - env.sail_angle))
velocity = np.array([np.cos(heading), np.sin(heading)])
velocity

array([ 0.98768834, -0.15643447])

In [14]:
random_array=np.random.randint(90, size=(1, 2))[0]

In [18]:
random_array+velocity

array([42.98768834,  9.84356553])

In [19]:
np.rad2deg(env.wind_direction)

0.0

In [20]:
np.rad2deg(env.sail_angle)

9.0

In [31]:
np.rad2deg(np.arctan2(np.sin(env.wind_direction - env.sail_angle), np.cos(env.wind_direction - env.sail_angle)))

39.99999999999999

In [26]:
np.rad2deg(env.wind_direction)

59.99999999999999

In [21]:
env.sail_angle=np.deg2rad(20)

In [22]:
# Initialize Q-table
q_table = np.zeros((10, 16, 21, 21, 91))

In [23]:
class SailingEnv(gym.Env):
    def step_and_update_q_table(self, action, state):
        next_state, reward, done, _ = self.step(action)
        next_wind_direction_idx = np.random.choice(self.wind_directions)
        next_state = (self.manhattan_distance(next_state, self.target_position), next_state[0], next_state[1], next_wind_direction_idx)
        next_max_q_value = np.max(self.q_table[next_state])
        td_error = reward + self.gamma * next_max_q_value - self.q_table[state][action]
        self.q_table[state][action] += self.alpha * td_error
        state = next_state
        self.epsilon = self.min_exploration_rate + (self.max_exploration_rate - self.min_exploration_rate) * np.exp(-self.exploration_decay_rate * episode)
        return next_state, reward, done, _

    def run_episodes(self, num_episodes):
        episode_rewards = []
        for episode in range(num_episodes):
            state = self.reset()
            episode_reward = 0
            while True:
                if np.random.uniform(0, 1) < self.epsilon:
                    action = self.action_space.sample()
                else:
                    action = np.argmax(self.q_table[state])
                next_state, reward, done, _ = self.step_and_update_q_table(action, state)
                state = next_state
                episode_reward += reward
                if done:
                    break
            episode_rewards.append(episode_reward)
            self.epsilon = self.min_exploration_rate + (self.max_exploration_rate - self.min_exploration_rate) * np.exp(-self.exploration_decay_rate * episode)
        return episode_rewards
    

