In [7]:
import gym
from gym import spaces
import numpy as np
import pandas as pd

class TradingEnv(gym.Env):
    def __init__(self):
        super(TradingEnv, self).__init__()

        # Load the CSV file with trading data
        self.data = pd.read_csv("C:\\Users\\Yash\\Downloads\\tata.csv")

        # Define the action space
        self.action_space = spaces.MultiDiscrete([3, 100])  # 3 actions: 0 (sell), 1 (hold), 2 (buy), with quantity 0-100

        # Define the observation space
        self.observation_space = spaces.Box(low=0, high=1, shape=(45, len(self.data.columns) - 1), dtype=np.float32)

        # Initialize other parameters
        self.balance = 100000
        self.shares_held = 0
        self.current_step = 45  # Start from the 45th candle
        self.reward_range = (-np.inf, np.inf)
        self.current_charge = 0
        self.profit = 0

    def reset(self):
        # Reset the environment to the initial state
        self.balance = 100000
        self.shares_held = 0
        self.current_step = 45  # Start from the 45th candle
        self.current_charge = 0
        self.profit = 0

        # Get the initial observation
        observation = self.get_observation()
        return observation

    def step(self, action):
        # Execute the selected action
        if action[0] == 0:  # Sell
            if self.shares_held >= action[1]:
                self.balance += action[1] * self.data.iloc[self.current_step]['Close']
                self.shares_held -= action[1]
                self.current_charge += 20
            else:
                return self.get_observation(), -10, True, {}

        elif action[0] == 2:  # Buy
            if self.balance >= action[1] * self.data.iloc[self.current_step]['Close']:
                self.balance -= action[1] * self.data.iloc[self.current_step]['Close']
                self.shares_held += action[1]
                self.current_charge += 100

        # Deduct charge every 90th candle
        if self.current_step % 90 == 0:
            self.balance -= 100

        # Check RSI condition
        if self.data.iloc[self.current_step]['RSI'] > 45 and action[0] == 2:  # Don't buy if RSI > 45
            return self.get_observation(), -5, True, {}

        # Update the current step
        self.current_step += 1

        # Calculate the reward
        reward = self.calculate_reward()

        # Check if the episode is done
        done = self.current_step == len(self.data) - 1 or self.balance <= 0

        return self.get_observation(), reward, done, {}

    def get_observation(self):
        # Get the current observation (past 45 candle data)
        observation = self.data.iloc[self.current_step - 45:self.current_step, :-1].values
        return observation

    def calculate_reward(self):
        # Calculate the reward based on profit/loss
        current_value = self.balance + self.shares_held * self.data.iloc[self.current_step]['Close']
        reward = current_value - 100000 - self.current_charge
        return reward

    def render(self):
        # Display current balance and shares held
        print(f"Balance: {self.balance}, Shares Held: {self.shares_held}")

# Test the environment
if __name__ == "__main__":
    env = TradingEnv()
    observation = env.reset()
    done = False
    while not done:
        action = env.action_space.sample()  # Random action for testing
        observation, reward, done, _ = env.step(action)
        env.render()


Balance: 100000, Shares Held: 0
Balance: 99340.91382029, Shares Held: 70


In [8]:
MultiDiscrete([3, 100])

NameError: name 'MultiDiscrete' is not defined

In [11]:
import gym
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import DummyVecEnv


# Import your custom TradingEnv (update the import statement if needed)
# from trading_env import TradingEnv  # Make sure to replace 'trading_env' with your file name

# Create a function to evaluate the trained model
def evaluate_trained_model(model, env, num_episodes=10):
    mean_rewards, std_rewards = evaluate_policy(model, env, n_eval_episodes=num_episodes)
    return mean_rewards, std_rewards

# Create and wrap the custom environment
env = TradingEnv()
env = DummyVecEnv([lambda: env])

# Create the PPO agent
model = PPO("MlpPolicy", env, verbose=1)

# Train the model (you can adjust the number of training steps)
model.learn(total_timesteps=1000000)

# Evaluate the trained model
mean_rewards, std_rewards = evaluate_trained_model(model, env)

print(f"Mean Reward: {mean_rewards:.2f}, Std Reward: {std_rewards:.2f}")

# Test the model for a few episodes
for _ in range(5):
    obs = env.reset()
    done = False
    total_reward = 0
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, _ = env.step(action)
        total_reward += reward
        env.render()
    print(f"Episode Reward: {total_reward:.2f}")




Using cpu device
-----------------------------
| time/              |      |
|    fps             | 833  |
|    iterations      | 1    |
|    time_elapsed    | 2    |
|    total_timesteps | 2048 |
-----------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 654        |
|    iterations           | 2          |
|    time_elapsed         | 6          |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.19097495 |
|    clip_fraction        | 0.69       |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.6       |
|    explained_variance   | -0.0082    |
|    learning_rate        | 0.0003     |
|    loss                 | 3.05       |
|    n_updates            | 10         |
|    policy_gradient_loss | -0.121     |
|    value_loss           | 30.7       |
----------------------------------------
-----------------------------------

-------------------------------------------
| time/                   |               |
|    fps                  | 578           |
|    iterations           | 13            |
|    time_elapsed         | 46            |
|    total_timesteps      | 26624         |
| train/                  |               |
|    approx_kl            | 0.00014299608 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -4.77         |
|    explained_variance   | -0.000384     |
|    learning_rate        | 0.0003        |
|    loss                 | 3.35e+04      |
|    n_updates            | 120           |
|    policy_gradient_loss | -0.000482     |
|    value_loss           | 4.73e+04      |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 578          |
|    iterations           | 14           |
|    time_elapsed         | 49      

------------------------------------------
| time/                   |              |
|    fps                  | 580          |
|    iterations           | 24           |
|    time_elapsed         | 84           |
|    total_timesteps      | 49152        |
| train/                  |              |
|    approx_kl            | 2.876157e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -4.63        |
|    explained_variance   | 0.000672     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.79e+05     |
|    n_updates            | 230          |
|    policy_gradient_loss | -0.000259    |
|    value_loss           | 6.38e+05     |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 580           |
|    iterations           | 25            |
|    time_elapsed         | 88            |
|    t

-----------------------------------------
| time/                   |             |
|    fps                  | 580         |
|    iterations           | 35          |
|    time_elapsed         | 123         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.020256128 |
|    clip_fraction        | 0.242       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.56       |
|    explained_variance   | -0.388      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0713     |
|    n_updates            | 340         |
|    policy_gradient_loss | -0.0433     |
|    value_loss           | 0.31        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 580        |
|    iterations           | 36         |
|    time_elapsed         | 126        |
|    total_timesteps      | 73728      

-----------------------------------------
| time/                   |             |
|    fps                  | 581         |
|    iterations           | 46          |
|    time_elapsed         | 162         |
|    total_timesteps      | 94208       |
| train/                  |             |
|    approx_kl            | 0.019200068 |
|    clip_fraction        | 0.175       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.28       |
|    explained_variance   | -4.5        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.129       |
|    n_updates            | 450         |
|    policy_gradient_loss | -0.0453     |
|    value_loss           | 2.08        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 581         |
|    iterations           | 47          |
|    time_elapsed         | 165         |
|    total_timesteps      | 96256 

------------------------------------------
| time/                   |              |
|    fps                  | 581          |
|    iterations           | 57           |
|    time_elapsed         | 200          |
|    total_timesteps      | 116736       |
| train/                  |              |
|    approx_kl            | 2.052082e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -4.2         |
|    explained_variance   | 0.0316       |
|    learning_rate        | 0.0003       |
|    loss                 | 1.48e+04     |
|    n_updates            | 560          |
|    policy_gradient_loss | -0.000372    |
|    value_loss           | 2.68e+04     |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 580           |
|    iterations           | 58            |
|    time_elapsed         | 204           |
|    t

-----------------------------------------
| time/                   |             |
|    fps                  | 580         |
|    iterations           | 68          |
|    time_elapsed         | 239         |
|    total_timesteps      | 139264      |
| train/                  |             |
|    approx_kl            | 0.027332913 |
|    clip_fraction        | 0.263       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.09       |
|    explained_variance   | -8.82       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0834     |
|    n_updates            | 670         |
|    policy_gradient_loss | -0.0547     |
|    value_loss           | 0.434       |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 580          |
|    iterations           | 69           |
|    time_elapsed         | 243          |
|    total_timesteps      | 1

-----------------------------------------
| time/                   |             |
|    fps                  | 581         |
|    iterations           | 79          |
|    time_elapsed         | 278         |
|    total_timesteps      | 161792      |
| train/                  |             |
|    approx_kl            | 0.017710153 |
|    clip_fraction        | 0.244       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.96       |
|    explained_variance   | -8.51       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00637     |
|    n_updates            | 780         |
|    policy_gradient_loss | -0.0293     |
|    value_loss           | 1.78        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 581         |
|    iterations           | 80          |
|    time_elapsed         | 281         |
|    total_timesteps      | 163840

-------------------------------------------
| time/                   |               |
|    fps                  | 581           |
|    iterations           | 90            |
|    time_elapsed         | 316           |
|    total_timesteps      | 184320        |
| train/                  |               |
|    approx_kl            | 0.00032652495 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -3.79         |
|    explained_variance   | 0.137         |
|    learning_rate        | 0.0003        |
|    loss                 | 3.28e+04      |
|    n_updates            | 890           |
|    policy_gradient_loss | -0.00041      |
|    value_loss           | 5.42e+04      |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 582         |
|    iterations           | 91          |
|    time_elapsed         | 320         

-----------------------------------------
| time/                   |             |
|    fps                  | 582         |
|    iterations           | 101         |
|    time_elapsed         | 354         |
|    total_timesteps      | 206848      |
| train/                  |             |
|    approx_kl            | 0.022907479 |
|    clip_fraction        | 0.221       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.79       |
|    explained_variance   | -6.07       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.27        |
|    n_updates            | 1000        |
|    policy_gradient_loss | -0.0364     |
|    value_loss           | 15.5        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 582        |
|    iterations           | 102        |
|    time_elapsed         | 358        |
|    total_timesteps      | 208896     

-----------------------------------------
| time/                   |             |
|    fps                  | 583         |
|    iterations           | 112         |
|    time_elapsed         | 393         |
|    total_timesteps      | 229376      |
| train/                  |             |
|    approx_kl            | 0.014663544 |
|    clip_fraction        | 0.143       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.65       |
|    explained_variance   | -20.2       |
|    learning_rate        | 0.0003      |
|    loss                 | 206         |
|    n_updates            | 1110        |
|    policy_gradient_loss | -0.0319     |
|    value_loss           | 28.6        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 582         |
|    iterations           | 113         |
|    time_elapsed         | 396         |
|    total_timesteps      | 231424

-----------------------------------------
| time/                   |             |
|    fps                  | 579         |
|    iterations           | 123         |
|    time_elapsed         | 434         |
|    total_timesteps      | 251904      |
| train/                  |             |
|    approx_kl            | 0.016029939 |
|    clip_fraction        | 0.201       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.4        |
|    explained_variance   | -24.5       |
|    learning_rate        | 0.0003      |
|    loss                 | 204         |
|    n_updates            | 1220        |
|    policy_gradient_loss | -0.0217     |
|    value_loss           | 24          |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 579        |
|    iterations           | 124        |
|    time_elapsed         | 437        |
|    total_timesteps      | 253952     

-----------------------------------------
| time/                   |             |
|    fps                  | 578         |
|    iterations           | 134         |
|    time_elapsed         | 474         |
|    total_timesteps      | 274432      |
| train/                  |             |
|    approx_kl            | 0.014546348 |
|    clip_fraction        | 0.0985      |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.45       |
|    explained_variance   | -8.67       |
|    learning_rate        | 0.0003      |
|    loss                 | 5.01        |
|    n_updates            | 1330        |
|    policy_gradient_loss | -0.0246     |
|    value_loss           | 70.9        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 578        |
|    iterations           | 135        |
|    time_elapsed         | 477        |
|    total_timesteps      | 276480     

-----------------------------------------
| time/                   |             |
|    fps                  | 576         |
|    iterations           | 145         |
|    time_elapsed         | 514         |
|    total_timesteps      | 296960      |
| train/                  |             |
|    approx_kl            | 0.041183855 |
|    clip_fraction        | 0.394       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.15       |
|    explained_variance   | 0.362       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0776     |
|    n_updates            | 1440        |
|    policy_gradient_loss | -0.067      |
|    value_loss           | 0.00532     |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 576         |
|    iterations           | 146         |
|    time_elapsed         | 518         |
|    total_timesteps      | 299008

------------------------------------------
| time/                   |              |
|    fps                  | 575          |
|    iterations           | 156          |
|    time_elapsed         | 554          |
|    total_timesteps      | 319488       |
| train/                  |              |
|    approx_kl            | 4.208414e-08 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -3.32        |
|    explained_variance   | 0.0321       |
|    learning_rate        | 0.0003       |
|    loss                 | 5.06e+06     |
|    n_updates            | 1550         |
|    policy_gradient_loss | -4.38e-05    |
|    value_loss           | 2.76e+07     |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 575           |
|    iterations           | 157           |
|    time_elapsed         | 558           |
|    t

----------------------------------------
| time/                   |            |
|    fps                  | 573        |
|    iterations           | 167        |
|    time_elapsed         | 596        |
|    total_timesteps      | 342016     |
| train/                  |            |
|    approx_kl            | 0.09409399 |
|    clip_fraction        | 0.401      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.99      |
|    explained_variance   | -105       |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0794    |
|    n_updates            | 1660       |
|    policy_gradient_loss | -0.0628    |
|    value_loss           | 0.115      |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 573         |
|    iterations           | 168         |
|    time_elapsed         | 599         |
|    total_timesteps      | 344064      |
| train/  

-----------------------------------------
| time/                   |             |
|    fps                  | 573         |
|    iterations           | 178         |
|    time_elapsed         | 635         |
|    total_timesteps      | 364544      |
| train/                  |             |
|    approx_kl            | 0.014067211 |
|    clip_fraction        | 0.0927      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.88       |
|    explained_variance   | -4.28       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0667      |
|    n_updates            | 1770        |
|    policy_gradient_loss | -0.0265     |
|    value_loss           | 5.58        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 573         |
|    iterations           | 179         |
|    time_elapsed         | 639         |
|    total_timesteps      | 366592

-----------------------------------------
| time/                   |             |
|    fps                  | 573         |
|    iterations           | 189         |
|    time_elapsed         | 675         |
|    total_timesteps      | 387072      |
| train/                  |             |
|    approx_kl            | 0.032050036 |
|    clip_fraction        | 0.293       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.06       |
|    explained_variance   | 0.184       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0902     |
|    n_updates            | 1880        |
|    policy_gradient_loss | -0.0369     |
|    value_loss           | 0.0936      |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 573         |
|    iterations           | 190         |
|    time_elapsed         | 678         |
|    total_timesteps      | 389120

------------------------------------------
| time/                   |              |
|    fps                  | 573          |
|    iterations           | 200          |
|    time_elapsed         | 714          |
|    total_timesteps      | 409600       |
| train/                  |              |
|    approx_kl            | 8.032657e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -3.77        |
|    explained_variance   | 0.00321      |
|    learning_rate        | 0.0003       |
|    loss                 | 3.12e+07     |
|    n_updates            | 1990         |
|    policy_gradient_loss | -2.23e-05    |
|    value_loss           | 5.91e+07     |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 573           |
|    iterations           | 201           |
|    time_elapsed         | 717           |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 573          |
|    iterations           | 211          |
|    time_elapsed         | 753          |
|    total_timesteps      | 432128       |
| train/                  |              |
|    approx_kl            | 2.203451e-07 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -3.75        |
|    explained_variance   | 0.00251      |
|    learning_rate        | 0.0003       |
|    loss                 | 4.39e+07     |
|    n_updates            | 2100         |
|    policy_gradient_loss | -0.000103    |
|    value_loss           | 7.93e+07     |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 573           |
|    iterations           | 212           |
|    time_elapsed         | 756           |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 573          |
|    iterations           | 222          |
|    time_elapsed         | 792          |
|    total_timesteps      | 454656       |
| train/                  |              |
|    approx_kl            | 1.514313e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -3.71        |
|    explained_variance   | 0.0131       |
|    learning_rate        | 0.0003       |
|    loss                 | 1.48e+07     |
|    n_updates            | 2210         |
|    policy_gradient_loss | -0.00063     |
|    value_loss           | 2.66e+07     |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 573          |
|    iterations           | 223          |
|    time_elapsed         | 795          |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 573           |
|    iterations           | 233           |
|    time_elapsed         | 831           |
|    total_timesteps      | 477184        |
| train/                  |               |
|    approx_kl            | 0.00023333126 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -3.53         |
|    explained_variance   | 0.0304        |
|    learning_rate        | 0.0003        |
|    loss                 | 5.21e+06      |
|    n_updates            | 2320          |
|    policy_gradient_loss | -0.00297      |
|    value_loss           | 1.46e+07      |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 573           |
|    iterations           | 234           |
|    time_elapsed         | 835 

-----------------------------------------
| time/                   |             |
|    fps                  | 573         |
|    iterations           | 244         |
|    time_elapsed         | 870         |
|    total_timesteps      | 499712      |
| train/                  |             |
|    approx_kl            | 0.003928326 |
|    clip_fraction        | 0.0225      |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.29       |
|    explained_variance   | 0.246       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.87e+04    |
|    n_updates            | 2430        |
|    policy_gradient_loss | -0.0128     |
|    value_loss           | 3.06e+05    |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 573           |
|    iterations           | 245           |
|    time_elapsed         | 874           |
|    total_timesteps    

-----------------------------------------
| time/                   |             |
|    fps                  | 573         |
|    iterations           | 255         |
|    time_elapsed         | 910         |
|    total_timesteps      | 522240      |
| train/                  |             |
|    approx_kl            | 0.033526774 |
|    clip_fraction        | 0.279       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.81       |
|    explained_variance   | -0.0881     |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0758     |
|    n_updates            | 2540        |
|    policy_gradient_loss | -0.033      |
|    value_loss           | 0.19        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 573         |
|    iterations           | 256         |
|    time_elapsed         | 913         |
|    total_timesteps      | 524288

------------------------------------------
| time/                   |              |
|    fps                  | 573          |
|    iterations           | 266          |
|    time_elapsed         | 949          |
|    total_timesteps      | 544768       |
| train/                  |              |
|    approx_kl            | 0.0002551125 |
|    clip_fraction        | 0.000146     |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.73        |
|    explained_variance   | 0.622        |
|    learning_rate        | 0.0003       |
|    loss                 | 401          |
|    n_updates            | 2650         |
|    policy_gradient_loss | -0.00195     |
|    value_loss           | 1.97e+04     |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 573          |
|    iterations           | 267          |
|    time_elapsed         | 953          |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 573           |
|    iterations           | 277           |
|    time_elapsed         | 988           |
|    total_timesteps      | 567296        |
| train/                  |               |
|    approx_kl            | 5.5209966e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.94         |
|    explained_variance   | 0.0598        |
|    learning_rate        | 0.0003        |
|    loss                 | 1.29e+07      |
|    n_updates            | 2760          |
|    policy_gradient_loss | -0.000146     |
|    value_loss           | 2.33e+07      |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 573           |
|    iterations           | 278           |
|    time_elapsed         | 992 

------------------------------------------
| time/                   |              |
|    fps                  | 573          |
|    iterations           | 288          |
|    time_elapsed         | 1028         |
|    total_timesteps      | 589824       |
| train/                  |              |
|    approx_kl            | 0.0007562538 |
|    clip_fraction        | 0.00298      |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.62        |
|    explained_variance   | 0.595        |
|    learning_rate        | 0.0003       |
|    loss                 | 4.92e+04     |
|    n_updates            | 2870         |
|    policy_gradient_loss | -0.000395    |
|    value_loss           | 6.75e+04     |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 573         |
|    iterations           | 289         |
|    time_elapsed         | 1032        |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 573         |
|    iterations           | 299         |
|    time_elapsed         | 1067        |
|    total_timesteps      | 612352      |
| train/                  |             |
|    approx_kl            | 0.013130031 |
|    clip_fraction        | 0.0746      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.99       |
|    explained_variance   | -0.0697     |
|    learning_rate        | 0.0003      |
|    loss                 | 1.15        |
|    n_updates            | 2980        |
|    policy_gradient_loss | -0.0234     |
|    value_loss           | 7.56        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 573        |
|    iterations           | 300        |
|    time_elapsed         | 1071       |
|    total_timesteps      | 614400     

-----------------------------------------
| time/                   |             |
|    fps                  | 573         |
|    iterations           | 310         |
|    time_elapsed         | 1107        |
|    total_timesteps      | 634880      |
| train/                  |             |
|    approx_kl            | 0.035061732 |
|    clip_fraction        | 0.261       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.21       |
|    explained_variance   | -22.8       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0741     |
|    n_updates            | 3090        |
|    policy_gradient_loss | -0.0433     |
|    value_loss           | 0.129       |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 573          |
|    iterations           | 311          |
|    time_elapsed         | 1110         |
|    total_timesteps      | 6

--------------------------------------------
| time/                   |                |
|    fps                  | 573            |
|    iterations           | 321            |
|    time_elapsed         | 1146           |
|    total_timesteps      | 657408         |
| train/                  |                |
|    approx_kl            | 0.000119640696 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -2.4           |
|    explained_variance   | 0.646          |
|    learning_rate        | 0.0003         |
|    loss                 | 3.5e+04        |
|    n_updates            | 3200           |
|    policy_gradient_loss | -0.000964      |
|    value_loss           | 6.6e+04        |
--------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 573        |
|    iterations           | 322        |
|    time_elapsed         

-------------------------------------------
| time/                   |               |
|    fps                  | 573           |
|    iterations           | 332           |
|    time_elapsed         | 1186          |
|    total_timesteps      | 679936        |
| train/                  |               |
|    approx_kl            | 0.00024042689 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.5          |
|    explained_variance   | 0.0717        |
|    learning_rate        | 0.0003        |
|    loss                 | 2.58e+07      |
|    n_updates            | 3310          |
|    policy_gradient_loss | -1.42e-05     |
|    value_loss           | 4.71e+07      |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 573           |
|    iterations           | 333           |
|    time_elapsed         | 1190

------------------------------------------
| time/                   |              |
|    fps                  | 573          |
|    iterations           | 343          |
|    time_elapsed         | 1225         |
|    total_timesteps      | 702464       |
| train/                  |              |
|    approx_kl            | 6.322807e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.51        |
|    explained_variance   | 0.414        |
|    learning_rate        | 0.0003       |
|    loss                 | 2.3e+05      |
|    n_updates            | 3420         |
|    policy_gradient_loss | -0.000344    |
|    value_loss           | 4.85e+05     |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 573           |
|    iterations           | 344           |
|    time_elapsed         | 1229          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 573           |
|    iterations           | 354           |
|    time_elapsed         | 1265          |
|    total_timesteps      | 724992        |
| train/                  |               |
|    approx_kl            | 2.5503105e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.69         |
|    explained_variance   | 0.111         |
|    learning_rate        | 0.0003        |
|    loss                 | 7.25e+06      |
|    n_updates            | 3530          |
|    policy_gradient_loss | -0.000289     |
|    value_loss           | 1.81e+07      |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 573           |
|    iterations           | 355           |
|    time_elapsed         | 1268

------------------------------------------
| time/                   |              |
|    fps                  | 572          |
|    iterations           | 365          |
|    time_elapsed         | 1304         |
|    total_timesteps      | 747520       |
| train/                  |              |
|    approx_kl            | 3.412724e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.63        |
|    explained_variance   | 0.134        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.01e+07     |
|    n_updates            | 3640         |
|    policy_gradient_loss | -0.000844    |
|    value_loss           | 1.78e+07     |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 572           |
|    iterations           | 366           |
|    time_elapsed         | 1308          |
|    t

-----------------------------------------
| time/                   |             |
|    fps                  | 572         |
|    iterations           | 376         |
|    time_elapsed         | 1344        |
|    total_timesteps      | 770048      |
| train/                  |             |
|    approx_kl            | 0.014165103 |
|    clip_fraction        | 0.0878      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.27       |
|    explained_variance   | -17.9       |
|    learning_rate        | 0.0003      |
|    loss                 | 894         |
|    n_updates            | 3750        |
|    policy_gradient_loss | -0.0191     |
|    value_loss           | 5.33e+03    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 572        |
|    iterations           | 377        |
|    time_elapsed         | 1347       |
|    total_timesteps      | 772096     

----------------------------------------
| time/                   |            |
|    fps                  | 572        |
|    iterations           | 387        |
|    time_elapsed         | 1383       |
|    total_timesteps      | 792576     |
| train/                  |            |
|    approx_kl            | 0.03882616 |
|    clip_fraction        | 0.246      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.31      |
|    explained_variance   | -1.66      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0624    |
|    n_updates            | 3860       |
|    policy_gradient_loss | -0.0378    |
|    value_loss           | 0.305      |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 572         |
|    iterations           | 388         |
|    time_elapsed         | 1387        |
|    total_timesteps      | 794624      |
| train/  

----------------------------------------
| time/                   |            |
|    fps                  | 572        |
|    iterations           | 398        |
|    time_elapsed         | 1423       |
|    total_timesteps      | 815104     |
| train/                  |            |
|    approx_kl            | 0.05813299 |
|    clip_fraction        | 0.356      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.07      |
|    explained_variance   | -0.387     |
|    learning_rate        | 0.0003     |
|    loss                 | -0.102     |
|    n_updates            | 3970       |
|    policy_gradient_loss | -0.0633    |
|    value_loss           | 0.00593    |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 572          |
|    iterations           | 399          |
|    time_elapsed         | 1426         |
|    total_timesteps      | 817152       |
| tr

-----------------------------------------
| time/                   |             |
|    fps                  | 572         |
|    iterations           | 409         |
|    time_elapsed         | 1462        |
|    total_timesteps      | 837632      |
| train/                  |             |
|    approx_kl            | 9.04256e-08 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.78       |
|    explained_variance   | 0.0507      |
|    learning_rate        | 0.0003      |
|    loss                 | 4.23e+07    |
|    n_updates            | 4080        |
|    policy_gradient_loss | -5.12e-05   |
|    value_loss           | 7.45e+07    |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 572           |
|    iterations           | 410           |
|    time_elapsed         | 1466          |
|    total_timesteps    

-------------------------------------------
| time/                   |               |
|    fps                  | 572           |
|    iterations           | 420           |
|    time_elapsed         | 1501          |
|    total_timesteps      | 860160        |
| train/                  |               |
|    approx_kl            | 7.0127426e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.56         |
|    explained_variance   | 0.0915        |
|    learning_rate        | 0.0003        |
|    loss                 | 9.94e+06      |
|    n_updates            | 4190          |
|    policy_gradient_loss | -0.000408     |
|    value_loss           | 5.4e+07       |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 572           |
|    iterations           | 421           |
|    time_elapsed         | 1505

------------------------------------------
| time/                   |              |
|    fps                  | 572          |
|    iterations           | 431          |
|    time_elapsed         | 1541         |
|    total_timesteps      | 882688       |
| train/                  |              |
|    approx_kl            | 0.0020473138 |
|    clip_fraction        | 0.0148       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.11        |
|    explained_variance   | 0.64         |
|    learning_rate        | 0.0003       |
|    loss                 | 3.18e+04     |
|    n_updates            | 4300         |
|    policy_gradient_loss | -0.00944     |
|    value_loss           | 1.26e+05     |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 572          |
|    iterations           | 432          |
|    time_elapsed         | 1544         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 572          |
|    iterations           | 442          |
|    time_elapsed         | 1580         |
|    total_timesteps      | 905216       |
| train/                  |              |
|    approx_kl            | 0.0006188791 |
|    clip_fraction        | 0.00342      |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.32        |
|    explained_variance   | 0.577        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.44e+05     |
|    n_updates            | 4410         |
|    policy_gradient_loss | -0.00377     |
|    value_loss           | 2.01e+05     |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 572           |
|    iterations           | 443           |
|    time_elapsed         | 1584          |
|    t

----------------------------------------
| time/                   |            |
|    fps                  | 572        |
|    iterations           | 453        |
|    time_elapsed         | 1620       |
|    total_timesteps      | 927744     |
| train/                  |            |
|    approx_kl            | 0.05896184 |
|    clip_fraction        | 0.345      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.92      |
|    explained_variance   | -0.684     |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0826    |
|    n_updates            | 4520       |
|    policy_gradient_loss | -0.0536    |
|    value_loss           | 0.0306     |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 572        |
|    iterations           | 454        |
|    time_elapsed         | 1623       |
|    total_timesteps      | 929792     |
| train/        

-----------------------------------------
| time/                   |             |
|    fps                  | 572         |
|    iterations           | 464         |
|    time_elapsed         | 1659        |
|    total_timesteps      | 950272      |
| train/                  |             |
|    approx_kl            | 0.025692526 |
|    clip_fraction        | 0.251       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.73       |
|    explained_variance   | -0.11       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0804      |
|    n_updates            | 4630        |
|    policy_gradient_loss | -0.0213     |
|    value_loss           | 0.263       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 572         |
|    iterations           | 465         |
|    time_elapsed         | 1663        |
|    total_timesteps      | 952320

-------------------------------------------
| time/                   |               |
|    fps                  | 572           |
|    iterations           | 475           |
|    time_elapsed         | 1699          |
|    total_timesteps      | 972800        |
| train/                  |               |
|    approx_kl            | 1.4260877e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.04         |
|    explained_variance   | 0.13          |
|    learning_rate        | 0.0003        |
|    loss                 | 2.17e+07      |
|    n_updates            | 4740          |
|    policy_gradient_loss | -1.1e-05      |
|    value_loss           | 4.51e+07      |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 572          |
|    iterations           | 476          |
|    time_elapsed         | 1703    

----------------------------------------
| time/                   |            |
|    fps                  | 572        |
|    iterations           | 486        |
|    time_elapsed         | 1739       |
|    total_timesteps      | 995328     |
| train/                  |            |
|    approx_kl            | 0.07381343 |
|    clip_fraction        | 0.298      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.84      |
|    explained_variance   | -162       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0362     |
|    n_updates            | 4850       |
|    policy_gradient_loss | -0.0284    |
|    value_loss           | 2.98       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 572        |
|    iterations           | 487        |
|    time_elapsed         | 1742       |
|    total_timesteps      | 997376     |
| train/        



TypeError: unsupported format string passed to numpy.ndarray.__format__

In [5]:
pip install stable-baselines3[extra]


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


