In [2]:
import gym
import numpy as np

class SailingEnv(gym.Env):
    def __init__(self, max_steps=200):
        self.action_space = gym.spaces.Discrete(3)
        self.observation_space = gym.spaces.Box(low=np.array([0, 0]), high=np.array([10, 10]), dtype=np.float32)
        self.wind_direction = np.array([1.0, 0.0])
        self.rudder_angle = 0
        self.position = np.array([0.0, 0.0])
        self.max_steps = max_steps
        self.current_step = 0
        self.target = np.array([8.0, 9.0])

    def reset(self):
        self.wind_direction = np.array([1.0, 0.0])
        self.rudder_angle = 0
        self.position = np.array([0.0, 0.0])
        self.current_step = 0
        return self.position

    def step(self, action):
        assert self.action_space.contains(action)
        self.current_step += 1
        
        wind_velocity = self.wind_direction * 5
        boat_velocity = np.array([5 * np.cos(self.rudder_angle), 5 * np.sin(self.rudder_angle)])
        relative_velocity = boat_velocity - wind_velocity
        boat_direction = np.arctan2(relative_velocity[1], relative_velocity[0])
        
        self.position += boat_velocity.astype(int)
        self.wind_direction += np.array([np.sin(0.1), np.cos(0.1)])

        reward = np.cos(boat_direction - np.arctan2(self.target[1] - self.position[1], self.target[0] - self.position[0]))
        
        done = self.position[0] >= 10 or self.position[1] >= 10 or self.position[0] < 0 or self.position[1] < 0 or self.current_step >= self.max_steps
        
        return self.position, reward, done, {}

env = SailingEnv()

for episode in range(15):
    observation = env.reset()
    done = False
    episode_reward = 0
    
    while not done:
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        episode_reward += reward
        print(f"Episode: {episode}, Step: {env.current_step}, Location: {observation}, Wind Direction: {env.wind_direction}, Rudder Angle: {env.rudder_angle}")
        
print("Done!")


Episode: 0, Step: 1, Location: [5. 0.], Wind Direction: [1.09983342 0.99500417], Rudder Angle: 0
Episode: 0, Step: 2, Location: [10.  0.], Wind Direction: [1.19966683 1.99000833], Rudder Angle: 0
Episode: 1, Step: 1, Location: [5. 0.], Wind Direction: [1.09983342 0.99500417], Rudder Angle: 0
Episode: 1, Step: 2, Location: [10.  0.], Wind Direction: [1.19966683 1.99000833], Rudder Angle: 0
Episode: 2, Step: 1, Location: [5. 0.], Wind Direction: [1.09983342 0.99500417], Rudder Angle: 0
Episode: 2, Step: 2, Location: [10.  0.], Wind Direction: [1.19966683 1.99000833], Rudder Angle: 0
Episode: 3, Step: 1, Location: [5. 0.], Wind Direction: [1.09983342 0.99500417], Rudder Angle: 0
Episode: 3, Step: 2, Location: [10.  0.], Wind Direction: [1.19966683 1.99000833], Rudder Angle: 0
Episode: 4, Step: 1, Location: [5. 0.], Wind Direction: [1.09983342 0.99500417], Rudder Angle: 0
Episode: 4, Step: 2, Location: [10.  0.], Wind Direction: [1.19966683 1.99000833], Rudder Angle: 0
Episode: 5, Step: 1,