# finrl_train (SAC)
Переработанная версия тренировки под SAC.

In [2]:
!pip install tensorboard

Collecting tensorboard
  Using cached tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)
Collecting absl-py>=0.4 (from tensorboard)
  Using cached absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting grpcio>=1.48.2 (from tensorboard)
  Downloading grpcio-1.76.0-cp313-cp313-win_amd64.whl.metadata (3.8 kB)
Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard)
  Using cached tensorboard_data_server-0.7.2-py3-none-any.whl.metadata (1.1 kB)
Downloading tensorboard-2.20.0-py3-none-any.whl (5.5 MB)
   ---------------------------------------- 0.0/5.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/5.5 MB ? eta -:--:--
   ------- -------------------------------- 1.0/5.5 MB 5.8 MB/s eta 0:00:01
   -------------------- ------------------- 2.9/5.5 MB 7.5 MB/s eta 0:00:01
   ------------------------------ --------- 4.2/5.5 MB 7.1 MB/s eta 0:00:01
   ---------------------------------------- 5.5/5.5 MB 8.2 MB/s eta 0:00:00
Using cached tensorboard_data_server-

In [1]:
import pandas as pd
import numpy as np
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv
import os
from pathlib import Path
import torch

# ✅ ПРОВЕРКА CUDA
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

# ✅ ПАПКИ
TRAINED_MODELS_DIR = Path("trained_models")
RESULTS_DIR = Path("results/sac_multi_stock")
TRAINED_MODELS_DIR.mkdir(exist_ok=True)
RESULTS_DIR.mkdir(exist_ok=True)

# 1) Загрузка CSV
def load_stock_data(folder='prepared_csv'):
    all_files = []
    for root, dirs, files in os.walk(folder):
        for file in files:
            if file.endswith('.csv'):
                all_files.append(os.path.join(root, file))
    
    print(f"Found {len(all_files)} ")
    if not all_files:
        return None
    
    dfs = []
    for file_path in all_files:
        ticker = os.path.basename(file_path).replace('.csv', '')
        print(f" {ticker}")
        
        df = pd.read_csv(file_path)
        date_col = next((c for c in ['date', 'Date'] if c in df.columns), None)
        close_col = next((c for c in ['close', 'Close', 'adj close'] if c in df.columns), None)
        
        if date_col and close_col:
            df = df[[date_col, close_col]].copy()
            df['date'] = pd.to_datetime(df[date_col])
            df['tic'] = ticker
            df['close'] = df[close_col].astype(float)
            df = df[['date', 'tic', 'close']].dropna()
            dfs.append(df)
    
    df_all = pd.concat(dfs, ignore_index=True).sort_values(['date', 'tic']).reset_index(drop=True)
    print(f" {df_all['tic'].nunique()} stocks, {len(df_all):,} strings")
    return df_all

# 2) SAC Stock Env
class StockTradingEnv(gym.Env):
    def __init__(self, df):
        super().__init__()
        self.df = df.sort_values(['date', 'tic']).reset_index(drop=True)
        self.tickers = sorted(self.df['tic'].unique())
        self.stock_dim = len(self.tickers)
        self.dates = sorted(self.df['date'].unique())
        self.max_days = len(self.dates)
        
        self.current_day = 0
        self.initial_cash = 1000000
        self.cash = self.initial_cash
        self.holdings = np.zeros(self.stock_dim)
        
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(1 + self.stock_dim,), dtype=np.float32)
        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.stock_dim,), dtype=np.float32)
    
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_day = 0
        self.cash = self.initial_cash
        self.holdings = np.zeros(self.stock_dim)
        return self._get_obs(), {}
    
    def _get_obs(self):
        day_data = self.df[self.df['date'] == self.dates[self.current_day]]
        prices = day_data.set_index('tic')['close'].reindex(self.tickers).fillna(method='ffill').values
        prices = prices / (prices.mean() + 1e-6)
        return np.concatenate([[self.cash / self.initial_cash], prices]).astype(np.float32)
    
    def step(self, action):
        self.current_day += 1
        terminated = self.current_day >= self.max_days - 1
        truncated = False
        
        day_data = self.df[self.df['date'] == self.dates[self.current_day]]
        prices = day_data.set_index('tic')['close'].reindex(self.tickers).fillna(method='ffill').values
        
        portfolio_value = self.cash + np.sum(self.holdings * prices)
        for i in range(self.stock_dim):
            target_weight = np.clip((action[i] + 1) / 2, 0, 1)
            target_value = portfolio_value * target_weight * 0.1
            target_shares = target_value / (prices[i] + 1e-6)
            self.holdings[i] = target_shares * 0.1
        
        new_portfolio_value = self.cash + np.sum(self.holdings * prices)
        reward = np.log(new_portfolio_value / self.initial_cash)
        
        return self._get_obs(), reward, terminated, truncated, {'portfolio_value': new_portfolio_value}

# 3) 🚀 SAC ОБУЧЕНИЕ НА GPU
print(" SAC Stock Trading (GPU)")
df = load_stock_data('prepared_csv')
if df is not None:
    env_fn = lambda: StockTradingEnv(df)
    env_train = DummyVecEnv([env_fn])
    
    # ✅ CUDA GPU!
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Device: {device}")
    
    model = SAC(
        "MlpPolicy", 
        env_train,
        verbose=1,
        learning_rate=3e-4,
        buffer_size=100000,
        batch_size=256,
        device=device  # ✅ GPU ускорение!
    )
    
    model.learn(total_timesteps=100000, progress_bar=True)
    
    model_path = TRAINED_MODELS_DIR / "sac_stocks.zip"
    model.save(model_path)
    print(f"SAC : {model_path}")
    
    print(f"Results: {RESULTS_DIR}")



Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


CUDA Available: False
 SAC Stock Trading (GPU)
Found 12 
 AAPL_prepared
 AMZN_prepared
 BAC_prepared
 DIS_prepared
 GOOG_prepared
 JPM_prepared
 META_prepared
 MSFT_prepared
 NFLX_prepared
 NVDA_prepared
 TSLA_prepared
 WMT_prepared
 12 stocks, 2,400 strings
Device: cpu
Using cpu device


Output()

  prices = day_data.set_index('tic')['close'].reindex(self.tickers).fillna(method='ffill').values


---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 59       |
|    time_elapsed    | 13       |
|    total_timesteps | 796      |
| train/             |          |
|    actor_loss      | -30.6    |
|    critic_loss     | 4.6      |
|    ent_coef        | 0.812    |
|    ent_coef_loss   | -4.19    |
|    learning_rate   | 0.0003   |
|    n_updates       | 695      |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 51       |
|    time_elapsed    | 30       |
|    total_timesteps | 1592     |
| train/             |          |
|    actor_loss      | -47.8    |
|    critic_loss     | 6.15     |
|    ent_coef        | 0.64     |
|    ent_coef_loss   | -8.95    |
|    learning_rate   | 0.0003   |
|    n_updates       | 1491     |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 12       |
|    fps             | 39       |
|    time_elapsed    | 60       |
|    total_timesteps | 2388     |
| train/             |          |
|    actor_loss      | -60.1    |
|    critic_loss     | 2.14     |
|    ent_coef        | 0.504    |
|    ent_coef_loss   | -13.6    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2287     |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 16       |
|    fps             | 38       |
|    time_elapsed    | 82       |
|    total_timesteps | 3184     |
| train/             |          |
|    actor_loss      | -67.9    |
|    critic_loss     | 1.74     |
|    ent_coef        | 0.398    |
|    ent_coef_loss   | -18.3    |
|    learning_rate   | 0.0003   |
|    n_updates       | 3083     |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 20       |
|    fps             | 39       |
|    time_elapsed    | 101      |
|    total_timesteps | 3980     |
| train/             |          |
|    actor_loss      | -73.7    |
|    critic_loss     | 11.5     |
|    ent_coef        | 0.314    |
|    ent_coef_loss   | -22.9    |
|    learning_rate   | 0.0003   |
|    n_updates       | 3879     |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 24       |
|    fps             | 38       |
|    time_elapsed    | 122      |
|    total_timesteps | 4776     |
| train/             |          |
|    actor_loss      | -78.5    |
|    critic_loss     | 10.2     |
|    ent_coef        | 0.249    |
|    ent_coef_loss   | -8.63    |
|    learning_rate   | 0.0003   |
|    n_updates       | 4675     |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 28       |
|    fps             | 38       |
|    time_elapsed    | 144      |
|    total_timesteps | 5572     |
| train/             |          |
|    actor_loss      | -77.8    |
|    critic_loss     | 17       |
|    ent_coef        | 0.208    |
|    ent_coef_loss   | -29      |
|    learning_rate   | 0.0003   |
|    n_updates       | 5471     |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 32       |
|    fps             | 38       |
|    time_elapsed    | 164      |
|    total_timesteps | 6368     |
| train/             |          |
|    actor_loss      | -78.1    |
|    critic_loss     | 17       |
|    ent_coef        | 0.164    |
|    ent_coef_loss   | -32.4    |
|    learning_rate   | 0.0003   |
|    n_updates       | 6267     |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 36       |
|    fps             | 37       |
|    time_elapsed    | 191      |
|    total_timesteps | 7164     |
| train/             |          |
|    actor_loss      | -76.5    |
|    critic_loss     | 11.6     |
|    ent_coef        | 0.129    |
|    ent_coef_loss   | -34.8    |
|    learning_rate   | 0.0003   |
|    n_updates       | 7063     |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 40       |
|    fps             | 37       |
|    time_elapsed    | 212      |
|    total_timesteps | 7960     |
| train/             |          |
|    actor_loss      | -80.8    |
|    critic_loss     | 4.41     |
|    ent_coef        | 0.122    |
|    ent_coef_loss   | -14.9    |
|    learning_rate   | 0.0003   |
|    n_updates       | 7859     |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 44       |
|    fps             | 37       |
|    time_elapsed    | 232      |
|    total_timesteps | 8756     |
| train/             |          |
|    actor_loss      | -82.4    |
|    critic_loss     | 9.42     |
|    ent_coef        | 0.11     |
|    ent_coef_loss   | -11.7    |
|    learning_rate   | 0.0003   |
|    n_updates       | 8655     |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 48       |
|    fps             | 37       |
|    time_elapsed    | 251      |
|    total_timesteps | 9552     |
| train/             |          |
|    actor_loss      | -83.3    |
|    critic_loss     | 10.2     |
|    ent_coef        | 0.0942   |
|    ent_coef_loss   | -39.1    |
|    learning_rate   | 0.0003   |
|    n_updates       | 9451     |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 52       |
|    fps             | 37       |
|    time_elapsed    | 274      |
|    total_timesteps | 10348    |
| train/             |          |
|    actor_loss      | -83.1    |
|    critic_loss     | 14.4     |
|    ent_coef        | 0.0829   |
|    ent_coef_loss   | -15      |
|    learning_rate   | 0.0003   |
|    n_updates       | 10247    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 56       |
|    fps             | 36       |
|    time_elapsed    | 301      |
|    total_timesteps | 11144    |
| train/             |          |
|    actor_loss      | -83.5    |
|    critic_loss     | 3.99     |
|    ent_coef        | 0.0701   |
|    ent_coef_loss   | -12.6    |
|    learning_rate   | 0.0003   |
|    n_updates       | 11043    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 60       |
|    fps             | 35       |
|    time_elapsed    | 333      |
|    total_timesteps | 11940    |
| train/             |          |
|    actor_loss      | -81.4    |
|    critic_loss     | 12.1     |
|    ent_coef        | 0.0596   |
|    ent_coef_loss   | 14.6     |
|    learning_rate   | 0.0003   |
|    n_updates       | 11839    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 64       |
|    fps             | 35       |
|    time_elapsed    | 354      |
|    total_timesteps | 12736    |
| train/             |          |
|    actor_loss      | -89.7    |
|    critic_loss     | 15.9     |
|    ent_coef        | 0.0707   |
|    ent_coef_loss   | 21.2     |
|    learning_rate   | 0.0003   |
|    n_updates       | 12635    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 68       |
|    fps             | 36       |
|    time_elapsed    | 374      |
|    total_timesteps | 13532    |
| train/             |          |
|    actor_loss      | -88.1    |
|    critic_loss     | 4.97     |
|    ent_coef        | 0.0771   |
|    ent_coef_loss   | 0.0882   |
|    learning_rate   | 0.0003   |
|    n_updates       | 13431    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 72       |
|    fps             | 36       |
|    time_elapsed    | 394      |
|    total_timesteps | 14328    |
| train/             |          |
|    actor_loss      | -90.7    |
|    critic_loss     | 16.7     |
|    ent_coef        | 0.0673   |
|    ent_coef_loss   | 9.84     |
|    learning_rate   | 0.0003   |
|    n_updates       | 14227    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 76       |
|    fps             | 36       |
|    time_elapsed    | 414      |
|    total_timesteps | 15124    |
| train/             |          |
|    actor_loss      | -91.1    |
|    critic_loss     | 4.02     |
|    ent_coef        | 0.0722   |
|    ent_coef_loss   | 7.64     |
|    learning_rate   | 0.0003   |
|    n_updates       | 15023    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 80       |
|    fps             | 36       |
|    time_elapsed    | 434      |
|    total_timesteps | 15920    |
| train/             |          |
|    actor_loss      | -87.5    |
|    critic_loss     | 4.56     |
|    ent_coef        | 0.0613   |
|    ent_coef_loss   | -8.46    |
|    learning_rate   | 0.0003   |
|    n_updates       | 15819    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 84       |
|    fps             | 36       |
|    time_elapsed    | 454      |
|    total_timesteps | 16716    |
| train/             |          |
|    actor_loss      | -89.6    |
|    critic_loss     | 5.31     |
|    ent_coef        | 0.0517   |
|    ent_coef_loss   | 2.89     |
|    learning_rate   | 0.0003   |
|    n_updates       | 16615    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 88       |
|    fps             | 36       |
|    time_elapsed    | 474      |
|    total_timesteps | 17512    |
| train/             |          |
|    actor_loss      | -89.9    |
|    critic_loss     | 4.22     |
|    ent_coef        | 0.0482   |
|    ent_coef_loss   | 20.8     |
|    learning_rate   | 0.0003   |
|    n_updates       | 17411    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 92       |
|    fps             | 36       |
|    time_elapsed    | 495      |
|    total_timesteps | 18308    |
| train/             |          |
|    actor_loss      | -87.4    |
|    critic_loss     | 12.9     |
|    ent_coef        | 0.0595   |
|    ent_coef_loss   | 3.5      |
|    learning_rate   | 0.0003   |
|    n_updates       | 18207    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 96       |
|    fps             | 37       |
|    time_elapsed    | 515      |
|    total_timesteps | 19104    |
| train/             |          |
|    actor_loss      | -85.2    |
|    critic_loss     | 14.8     |
|    ent_coef        | 0.059    |
|    ent_coef_loss   | 1.96     |
|    learning_rate   | 0.0003   |
|    n_updates       | 19003    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 100      |
|    fps             | 37       |
|    time_elapsed    | 535      |
|    total_timesteps | 19900    |
| train/             |          |
|    actor_loss      | -86.2    |
|    critic_loss     | 4.66     |
|    ent_coef        | 0.0568   |
|    ent_coef_loss   | 2.89     |
|    learning_rate   | 0.0003   |
|    n_updates       | 19799    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 104      |
|    fps             | 37       |
|    time_elapsed    | 555      |
|    total_timesteps | 20696    |
| train/             |          |
|    actor_loss      | -84.3    |
|    critic_loss     | 10.2     |
|    ent_coef        | 0.0516   |
|    ent_coef_loss   | -23.4    |
|    learning_rate   | 0.0003   |
|    n_updates       | 20595    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 108      |
|    fps             | 36       |
|    time_elapsed    | 583      |
|    total_timesteps | 21492    |
| train/             |          |
|    actor_loss      | -83.4    |
|    critic_loss     | 7.94     |
|    ent_coef        | 0.0463   |
|    ent_coef_loss   | -4.99    |
|    learning_rate   | 0.0003   |
|    n_updates       | 21391    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 112      |
|    fps             | 36       |
|    time_elapsed    | 610      |
|    total_timesteps | 22288    |
| train/             |          |
|    actor_loss      | -80.8    |
|    critic_loss     | 5.93     |
|    ent_coef        | 0.0482   |
|    ent_coef_loss   | 3.43     |
|    learning_rate   | 0.0003   |
|    n_updates       | 22187    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 116      |
|    fps             | 36       |
|    time_elapsed    | 637      |
|    total_timesteps | 23084    |
| train/             |          |
|    actor_loss      | -78.6    |
|    critic_loss     | 5.94     |
|    ent_coef        | 0.0371   |
|    ent_coef_loss   | -11.2    |
|    learning_rate   | 0.0003   |
|    n_updates       | 22983    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 120      |
|    fps             | 36       |
|    time_elapsed    | 663      |
|    total_timesteps | 23880    |
| train/             |          |
|    actor_loss      | -73      |
|    critic_loss     | 11       |
|    ent_coef        | 0.0371   |
|    ent_coef_loss   | 4.84     |
|    learning_rate   | 0.0003   |
|    n_updates       | 23779    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 124      |
|    fps             | 35       |
|    time_elapsed    | 689      |
|    total_timesteps | 24676    |
| train/             |          |
|    actor_loss      | -72.3    |
|    critic_loss     | 4.42     |
|    ent_coef        | 0.0399   |
|    ent_coef_loss   | -9.44    |
|    learning_rate   | 0.0003   |
|    n_updates       | 24575    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 128      |
|    fps             | 35       |
|    time_elapsed    | 715      |
|    total_timesteps | 25472    |
| train/             |          |
|    actor_loss      | -74.5    |
|    critic_loss     | 2.12     |
|    ent_coef        | 0.048    |
|    ent_coef_loss   | 2.96     |
|    learning_rate   | 0.0003   |
|    n_updates       | 25371    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 132      |
|    fps             | 35       |
|    time_elapsed    | 742      |
|    total_timesteps | 26268    |
| train/             |          |
|    actor_loss      | -72.7    |
|    critic_loss     | 7.47     |
|    ent_coef        | 0.0418   |
|    ent_coef_loss   | -12.6    |
|    learning_rate   | 0.0003   |
|    n_updates       | 26167    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 136      |
|    fps             | 35       |
|    time_elapsed    | 768      |
|    total_timesteps | 27064    |
| train/             |          |
|    actor_loss      | -69.1    |
|    critic_loss     | 11       |
|    ent_coef        | 0.0352   |
|    ent_coef_loss   | -20.1    |
|    learning_rate   | 0.0003   |
|    n_updates       | 26963    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 140      |
|    fps             | 35       |
|    time_elapsed    | 793      |
|    total_timesteps | 27860    |
| train/             |          |
|    actor_loss      | -70.4    |
|    critic_loss     | 14.1     |
|    ent_coef        | 0.0325   |
|    ent_coef_loss   | 13.6     |
|    learning_rate   | 0.0003   |
|    n_updates       | 27759    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 144      |
|    fps             | 34       |
|    time_elapsed    | 819      |
|    total_timesteps | 28656    |
| train/             |          |
|    actor_loss      | -66.1    |
|    critic_loss     | 4.72     |
|    ent_coef        | 0.0311   |
|    ent_coef_loss   | -16.2    |
|    learning_rate   | 0.0003   |
|    n_updates       | 28555    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 148      |
|    fps             | 34       |
|    time_elapsed    | 845      |
|    total_timesteps | 29452    |
| train/             |          |
|    actor_loss      | -65.3    |
|    critic_loss     | 4.69     |
|    ent_coef        | 0.0258   |
|    ent_coef_loss   | -7.78    |
|    learning_rate   | 0.0003   |
|    n_updates       | 29351    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 152      |
|    fps             | 34       |
|    time_elapsed    | 872      |
|    total_timesteps | 30248    |
| train/             |          |
|    actor_loss      | -61.2    |
|    critic_loss     | 4.91     |
|    ent_coef        | 0.024    |
|    ent_coef_loss   | 20       |
|    learning_rate   | 0.0003   |
|    n_updates       | 30147    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 156      |
|    fps             | 34       |
|    time_elapsed    | 901      |
|    total_timesteps | 31044    |
| train/             |          |
|    actor_loss      | -59.2    |
|    critic_loss     | 4.6      |
|    ent_coef        | 0.0316   |
|    ent_coef_loss   | 0.491    |
|    learning_rate   | 0.0003   |
|    n_updates       | 30943    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 160      |
|    fps             | 34       |
|    time_elapsed    | 921      |
|    total_timesteps | 31840    |
| train/             |          |
|    actor_loss      | -56.2    |
|    critic_loss     | 3.28     |
|    ent_coef        | 0.0267   |
|    ent_coef_loss   | -1.32    |
|    learning_rate   | 0.0003   |
|    n_updates       | 31739    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 164      |
|    fps             | 34       |
|    time_elapsed    | 942      |
|    total_timesteps | 32636    |
| train/             |          |
|    actor_loss      | -54.4    |
|    critic_loss     | 3.45     |
|    ent_coef        | 0.023    |
|    ent_coef_loss   | -2.1     |
|    learning_rate   | 0.0003   |
|    n_updates       | 32535    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 168      |
|    fps             | 34       |
|    time_elapsed    | 963      |
|    total_timesteps | 33432    |
| train/             |          |
|    actor_loss      | -55.1    |
|    critic_loss     | 2.34     |
|    ent_coef        | 0.0273   |
|    ent_coef_loss   | 27.9     |
|    learning_rate   | 0.0003   |
|    n_updates       | 33331    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 172      |
|    fps             | 34       |
|    time_elapsed    | 988      |
|    total_timesteps | 34228    |
| train/             |          |
|    actor_loss      | -51.4    |
|    critic_loss     | 3.14     |
|    ent_coef        | 0.0313   |
|    ent_coef_loss   | 5.99     |
|    learning_rate   | 0.0003   |
|    n_updates       | 34127    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 176      |
|    fps             | 34       |
|    time_elapsed    | 1013     |
|    total_timesteps | 35024    |
| train/             |          |
|    actor_loss      | -50.2    |
|    critic_loss     | 0.871    |
|    ent_coef        | 0.0341   |
|    ent_coef_loss   | 2.16     |
|    learning_rate   | 0.0003   |
|    n_updates       | 34923    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 180      |
|    fps             | 34       |
|    time_elapsed    | 1034     |
|    total_timesteps | 35820    |
| train/             |          |
|    actor_loss      | -46.6    |
|    critic_loss     | 1.98     |
|    ent_coef        | 0.0308   |
|    ent_coef_loss   | 4.73     |
|    learning_rate   | 0.0003   |
|    n_updates       | 35719    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 184      |
|    fps             | 34       |
|    time_elapsed    | 1054     |
|    total_timesteps | 36616    |
| train/             |          |
|    actor_loss      | -45.6    |
|    critic_loss     | 2.35     |
|    ent_coef        | 0.0263   |
|    ent_coef_loss   | 14.3     |
|    learning_rate   | 0.0003   |
|    n_updates       | 36515    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 188      |
|    fps             | 34       |
|    time_elapsed    | 1074     |
|    total_timesteps | 37412    |
| train/             |          |
|    actor_loss      | -45.4    |
|    critic_loss     | 1.66     |
|    ent_coef        | 0.0318   |
|    ent_coef_loss   | -5.93    |
|    learning_rate   | 0.0003   |
|    n_updates       | 37311    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 192      |
|    fps             | 34       |
|    time_elapsed    | 1094     |
|    total_timesteps | 38208    |
| train/             |          |
|    actor_loss      | -43.8    |
|    critic_loss     | 1.41     |
|    ent_coef        | 0.0242   |
|    ent_coef_loss   | -16.2    |
|    learning_rate   | 0.0003   |
|    n_updates       | 38107    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 196      |
|    fps             | 34       |
|    time_elapsed    | 1115     |
|    total_timesteps | 39004    |
| train/             |          |
|    actor_loss      | -40.9    |
|    critic_loss     | 1.74     |
|    ent_coef        | 0.0173   |
|    ent_coef_loss   | -19.8    |
|    learning_rate   | 0.0003   |
|    n_updates       | 38903    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 200      |
|    fps             | 35       |
|    time_elapsed    | 1135     |
|    total_timesteps | 39800    |
| train/             |          |
|    actor_loss      | -39.5    |
|    critic_loss     | 1.92     |
|    ent_coef        | 0.0135   |
|    ent_coef_loss   | -34.4    |
|    learning_rate   | 0.0003   |
|    n_updates       | 39699    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 204      |
|    fps             | 35       |
|    time_elapsed    | 1155     |
|    total_timesteps | 40596    |
| train/             |          |
|    actor_loss      | -38      |
|    critic_loss     | 1.46     |
|    ent_coef        | 0.011    |
|    ent_coef_loss   | 1.62     |
|    learning_rate   | 0.0003   |
|    n_updates       | 40495    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 208      |
|    fps             | 35       |
|    time_elapsed    | 1175     |
|    total_timesteps | 41392    |
| train/             |          |
|    actor_loss      | -34.9    |
|    critic_loss     | 1.8      |
|    ent_coef        | 0.0102   |
|    ent_coef_loss   | -13.3    |
|    learning_rate   | 0.0003   |
|    n_updates       | 41291    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 212      |
|    fps             | 35       |
|    time_elapsed    | 1195     |
|    total_timesteps | 42188    |
| train/             |          |
|    actor_loss      | -36.2    |
|    critic_loss     | 1.06     |
|    ent_coef        | 0.0112   |
|    ent_coef_loss   | 32.1     |
|    learning_rate   | 0.0003   |
|    n_updates       | 42087    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 216      |
|    fps             | 35       |
|    time_elapsed    | 1216     |
|    total_timesteps | 42984    |
| train/             |          |
|    actor_loss      | -35.2    |
|    critic_loss     | 1.21     |
|    ent_coef        | 0.0156   |
|    ent_coef_loss   | 29.8     |
|    learning_rate   | 0.0003   |
|    n_updates       | 42883    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 220      |
|    fps             | 35       |
|    time_elapsed    | 1236     |
|    total_timesteps | 43780    |
| train/             |          |
|    actor_loss      | -34.4    |
|    critic_loss     | 0.707    |
|    ent_coef        | 0.0178   |
|    ent_coef_loss   | 9.06     |
|    learning_rate   | 0.0003   |
|    n_updates       | 43679    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 224      |
|    fps             | 35       |
|    time_elapsed    | 1256     |
|    total_timesteps | 44576    |
| train/             |          |
|    actor_loss      | -32.3    |
|    critic_loss     | 1.03     |
|    ent_coef        | 0.018    |
|    ent_coef_loss   | 1.66     |
|    learning_rate   | 0.0003   |
|    n_updates       | 44475    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 228      |
|    fps             | 35       |
|    time_elapsed    | 1282     |
|    total_timesteps | 45372    |
| train/             |          |
|    actor_loss      | -32      |
|    critic_loss     | 0.523    |
|    ent_coef        | 0.0142   |
|    ent_coef_loss   | 4.35     |
|    learning_rate   | 0.0003   |
|    n_updates       | 45271    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 232      |
|    fps             | 35       |
|    time_elapsed    | 1308     |
|    total_timesteps | 46168    |
| train/             |          |
|    actor_loss      | -29.2    |
|    critic_loss     | 1.17     |
|    ent_coef        | 0.012    |
|    ent_coef_loss   | -32.8    |
|    learning_rate   | 0.0003   |
|    n_updates       | 46067    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 236      |
|    fps             | 35       |
|    time_elapsed    | 1335     |
|    total_timesteps | 46964    |
| train/             |          |
|    actor_loss      | -27.4    |
|    critic_loss     | 0.916    |
|    ent_coef        | 0.00862  |
|    ent_coef_loss   | -34      |
|    learning_rate   | 0.0003   |
|    n_updates       | 46863    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 240      |
|    fps             | 35       |
|    time_elapsed    | 1362     |
|    total_timesteps | 47760    |
| train/             |          |
|    actor_loss      | -28.1    |
|    critic_loss     | 0.478    |
|    ent_coef        | 0.0076   |
|    ent_coef_loss   | 6.66     |
|    learning_rate   | 0.0003   |
|    n_updates       | 47659    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 244      |
|    fps             | 34       |
|    time_elapsed    | 1388     |
|    total_timesteps | 48556    |
| train/             |          |
|    actor_loss      | -26.5    |
|    critic_loss     | 0.7      |
|    ent_coef        | 0.0077   |
|    ent_coef_loss   | -9.24    |
|    learning_rate   | 0.0003   |
|    n_updates       | 48455    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 248      |
|    fps             | 34       |
|    time_elapsed    | 1416     |
|    total_timesteps | 49352    |
| train/             |          |
|    actor_loss      | -24.1    |
|    critic_loss     | 0.821    |
|    ent_coef        | 0.00706  |
|    ent_coef_loss   | -3.89    |
|    learning_rate   | 0.0003   |
|    n_updates       | 49251    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 252      |
|    fps             | 34       |
|    time_elapsed    | 1443     |
|    total_timesteps | 50148    |
| train/             |          |
|    actor_loss      | -22.8    |
|    critic_loss     | 0.782    |
|    ent_coef        | 0.00861  |
|    ent_coef_loss   | 1.09     |
|    learning_rate   | 0.0003   |
|    n_updates       | 50047    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 256      |
|    fps             | 34       |
|    time_elapsed    | 1471     |
|    total_timesteps | 50944    |
| train/             |          |
|    actor_loss      | -22.4    |
|    critic_loss     | 1.22     |
|    ent_coef        | 0.0118   |
|    ent_coef_loss   | 9.64     |
|    learning_rate   | 0.0003   |
|    n_updates       | 50843    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 260      |
|    fps             | 34       |
|    time_elapsed    | 1497     |
|    total_timesteps | 51740    |
| train/             |          |
|    actor_loss      | -23.5    |
|    critic_loss     | 0.584    |
|    ent_coef        | 0.0127   |
|    ent_coef_loss   | 0.981    |
|    learning_rate   | 0.0003   |
|    n_updates       | 51639    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 264      |
|    fps             | 34       |
|    time_elapsed    | 1524     |
|    total_timesteps | 52536    |
| train/             |          |
|    actor_loss      | -22.3    |
|    critic_loss     | 0.592    |
|    ent_coef        | 0.0138   |
|    ent_coef_loss   | 1.09     |
|    learning_rate   | 0.0003   |
|    n_updates       | 52435    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 268      |
|    fps             | 34       |
|    time_elapsed    | 1551     |
|    total_timesteps | 53332    |
| train/             |          |
|    actor_loss      | -21.2    |
|    critic_loss     | 0.62     |
|    ent_coef        | 0.00986  |
|    ent_coef_loss   | -27.1    |
|    learning_rate   | 0.0003   |
|    n_updates       | 53231    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 272      |
|    fps             | 34       |
|    time_elapsed    | 1578     |
|    total_timesteps | 54128    |
| train/             |          |
|    actor_loss      | -20.4    |
|    critic_loss     | 0.567    |
|    ent_coef        | 0.00759  |
|    ent_coef_loss   | -44.9    |
|    learning_rate   | 0.0003   |
|    n_updates       | 54027    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 276      |
|    fps             | 34       |
|    time_elapsed    | 1608     |
|    total_timesteps | 54924    |
| train/             |          |
|    actor_loss      | -19      |
|    critic_loss     | 0.731    |
|    ent_coef        | 0.00598  |
|    ent_coef_loss   | -36.1    |
|    learning_rate   | 0.0003   |
|    n_updates       | 54823    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 280      |
|    fps             | 34       |
|    time_elapsed    | 1629     |
|    total_timesteps | 55720    |
| train/             |          |
|    actor_loss      | -18.2    |
|    critic_loss     | 0.715    |
|    ent_coef        | 0.00489  |
|    ent_coef_loss   | -22.7    |
|    learning_rate   | 0.0003   |
|    n_updates       | 55619    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 284      |
|    fps             | 34       |
|    time_elapsed    | 1650     |
|    total_timesteps | 56516    |
| train/             |          |
|    actor_loss      | -17.3    |
|    critic_loss     | 0.703    |
|    ent_coef        | 0.0043   |
|    ent_coef_loss   | -3.68    |
|    learning_rate   | 0.0003   |
|    n_updates       | 56415    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 288      |
|    fps             | 34       |
|    time_elapsed    | 1671     |
|    total_timesteps | 57312    |
| train/             |          |
|    actor_loss      | -17      |
|    critic_loss     | 0.431    |
|    ent_coef        | 0.00488  |
|    ent_coef_loss   | 30.1     |
|    learning_rate   | 0.0003   |
|    n_updates       | 57211    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 292      |
|    fps             | 34       |
|    time_elapsed    | 1692     |
|    total_timesteps | 58108    |
| train/             |          |
|    actor_loss      | -15.3    |
|    critic_loss     | 0.62     |
|    ent_coef        | 0.00681  |
|    ent_coef_loss   | 21.5     |
|    learning_rate   | 0.0003   |
|    n_updates       | 58007    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 296      |
|    fps             | 34       |
|    time_elapsed    | 1713     |
|    total_timesteps | 58904    |
| train/             |          |
|    actor_loss      | -14.8    |
|    critic_loss     | 0.363    |
|    ent_coef        | 0.00753  |
|    ent_coef_loss   | -1.03    |
|    learning_rate   | 0.0003   |
|    n_updates       | 58803    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 300      |
|    fps             | 34       |
|    time_elapsed    | 1734     |
|    total_timesteps | 59700    |
| train/             |          |
|    actor_loss      | -15      |
|    critic_loss     | 0.436    |
|    ent_coef        | 0.00928  |
|    ent_coef_loss   | 35.1     |
|    learning_rate   | 0.0003   |
|    n_updates       | 59599    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 304      |
|    fps             | 34       |
|    time_elapsed    | 1755     |
|    total_timesteps | 60496    |
| train/             |          |
|    actor_loss      | -13.4    |
|    critic_loss     | 0.466    |
|    ent_coef        | 0.00893  |
|    ent_coef_loss   | -35.3    |
|    learning_rate   | 0.0003   |
|    n_updates       | 60395    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 308      |
|    fps             | 34       |
|    time_elapsed    | 1776     |
|    total_timesteps | 61292    |
| train/             |          |
|    actor_loss      | -14.4    |
|    critic_loss     | 0.274    |
|    ent_coef        | 0.00892  |
|    ent_coef_loss   | 44.7     |
|    learning_rate   | 0.0003   |
|    n_updates       | 61191    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 312      |
|    fps             | 34       |
|    time_elapsed    | 1797     |
|    total_timesteps | 62088    |
| train/             |          |
|    actor_loss      | -14.2    |
|    critic_loss     | 0.182    |
|    ent_coef        | 0.0107   |
|    ent_coef_loss   | 34.6     |
|    learning_rate   | 0.0003   |
|    n_updates       | 61987    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 316      |
|    fps             | 34       |
|    time_elapsed    | 1818     |
|    total_timesteps | 62884    |
| train/             |          |
|    actor_loss      | -13.4    |
|    critic_loss     | 0.21     |
|    ent_coef        | 0.0101   |
|    ent_coef_loss   | -61.9    |
|    learning_rate   | 0.0003   |
|    n_updates       | 62783    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 320      |
|    fps             | 34       |
|    time_elapsed    | 1842     |
|    total_timesteps | 63680    |
| train/             |          |
|    actor_loss      | -12.9    |
|    critic_loss     | 0.207    |
|    ent_coef        | 0.00871  |
|    ent_coef_loss   | -7.1     |
|    learning_rate   | 0.0003   |
|    n_updates       | 63579    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 324      |
|    fps             | 34       |
|    time_elapsed    | 1871     |
|    total_timesteps | 64476    |
| train/             |          |
|    actor_loss      | -12.5    |
|    critic_loss     | 0.223    |
|    ent_coef        | 0.00752  |
|    ent_coef_loss   | -38.8    |
|    learning_rate   | 0.0003   |
|    n_updates       | 64375    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 328      |
|    fps             | 34       |
|    time_elapsed    | 1899     |
|    total_timesteps | 65272    |
| train/             |          |
|    actor_loss      | -11.6    |
|    critic_loss     | 0.22     |
|    ent_coef        | 0.00602  |
|    ent_coef_loss   | -11.5    |
|    learning_rate   | 0.0003   |
|    n_updates       | 65171    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 332      |
|    fps             | 34       |
|    time_elapsed    | 1928     |
|    total_timesteps | 66068    |
| train/             |          |
|    actor_loss      | -11.5    |
|    critic_loss     | 0.133    |
|    ent_coef        | 0.00519  |
|    ent_coef_loss   | -14      |
|    learning_rate   | 0.0003   |
|    n_updates       | 65967    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 336      |
|    fps             | 34       |
|    time_elapsed    | 1956     |
|    total_timesteps | 66864    |
| train/             |          |
|    actor_loss      | -10.7    |
|    critic_loss     | 0.35     |
|    ent_coef        | 0.00421  |
|    ent_coef_loss   | -27.7    |
|    learning_rate   | 0.0003   |
|    n_updates       | 66763    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 340      |
|    fps             | 34       |
|    time_elapsed    | 1984     |
|    total_timesteps | 67660    |
| train/             |          |
|    actor_loss      | -10.3    |
|    critic_loss     | 0.208    |
|    ent_coef        | 0.00323  |
|    ent_coef_loss   | -16.1    |
|    learning_rate   | 0.0003   |
|    n_updates       | 67559    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 344      |
|    fps             | 33       |
|    time_elapsed    | 2013     |
|    total_timesteps | 68456    |
| train/             |          |
|    actor_loss      | -9.84    |
|    critic_loss     | 0.266    |
|    ent_coef        | 0.00297  |
|    ent_coef_loss   | -5.32    |
|    learning_rate   | 0.0003   |
|    n_updates       | 68355    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 348      |
|    fps             | 33       |
|    time_elapsed    | 2041     |
|    total_timesteps | 69252    |
| train/             |          |
|    actor_loss      | -9.43    |
|    critic_loss     | 0.151    |
|    ent_coef        | 0.00325  |
|    ent_coef_loss   | -7.44    |
|    learning_rate   | 0.0003   |
|    n_updates       | 69151    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 352      |
|    fps             | 33       |
|    time_elapsed    | 2069     |
|    total_timesteps | 70048    |
| train/             |          |
|    actor_loss      | -9.39    |
|    critic_loss     | 0.188    |
|    ent_coef        | 0.00319  |
|    ent_coef_loss   | -0.381   |
|    learning_rate   | 0.0003   |
|    n_updates       | 69947    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 356      |
|    fps             | 33       |
|    time_elapsed    | 2097     |
|    total_timesteps | 70844    |
| train/             |          |
|    actor_loss      | -8.48    |
|    critic_loss     | 0.196    |
|    ent_coef        | 0.00299  |
|    ent_coef_loss   | 18.7     |
|    learning_rate   | 0.0003   |
|    n_updates       | 70743    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 360      |
|    fps             | 33       |
|    time_elapsed    | 2126     |
|    total_timesteps | 71640    |
| train/             |          |
|    actor_loss      | -8.41    |
|    critic_loss     | 0.139    |
|    ent_coef        | 0.00397  |
|    ent_coef_loss   | -3.35    |
|    learning_rate   | 0.0003   |
|    n_updates       | 71539    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 364      |
|    fps             | 33       |
|    time_elapsed    | 2154     |
|    total_timesteps | 72436    |
| train/             |          |
|    actor_loss      | -7.9     |
|    critic_loss     | 0.0974   |
|    ent_coef        | 0.00318  |
|    ent_coef_loss   | -23.4    |
|    learning_rate   | 0.0003   |
|    n_updates       | 72335    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 368      |
|    fps             | 33       |
|    time_elapsed    | 2183     |
|    total_timesteps | 73232    |
| train/             |          |
|    actor_loss      | -8.16    |
|    critic_loss     | 0.109    |
|    ent_coef        | 0.00294  |
|    ent_coef_loss   | 29.8     |
|    learning_rate   | 0.0003   |
|    n_updates       | 73131    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 372      |
|    fps             | 33       |
|    time_elapsed    | 2212     |
|    total_timesteps | 74028    |
| train/             |          |
|    actor_loss      | -8.25    |
|    critic_loss     | 0.107    |
|    ent_coef        | 0.00414  |
|    ent_coef_loss   | 54.4     |
|    learning_rate   | 0.0003   |
|    n_updates       | 73927    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 376      |
|    fps             | 33       |
|    time_elapsed    | 2241     |
|    total_timesteps | 74824    |
| train/             |          |
|    actor_loss      | -8.9     |
|    critic_loss     | 0.0794   |
|    ent_coef        | 0.00551  |
|    ent_coef_loss   | 74.5     |
|    learning_rate   | 0.0003   |
|    n_updates       | 74723    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 380      |
|    fps             | 33       |
|    time_elapsed    | 2270     |
|    total_timesteps | 75620    |
| train/             |          |
|    actor_loss      | -8.43    |
|    critic_loss     | 0.0917   |
|    ent_coef        | 0.00619  |
|    ent_coef_loss   | -5.18    |
|    learning_rate   | 0.0003   |
|    n_updates       | 75519    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 384      |
|    fps             | 33       |
|    time_elapsed    | 2299     |
|    total_timesteps | 76416    |
| train/             |          |
|    actor_loss      | -8.14    |
|    critic_loss     | 0.0732   |
|    ent_coef        | 0.00667  |
|    ent_coef_loss   | -38      |
|    learning_rate   | 0.0003   |
|    n_updates       | 76315    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 388      |
|    fps             | 33       |
|    time_elapsed    | 2329     |
|    total_timesteps | 77212    |
| train/             |          |
|    actor_loss      | -8.18    |
|    critic_loss     | 0.0635   |
|    ent_coef        | 0.00452  |
|    ent_coef_loss   | -86.7    |
|    learning_rate   | 0.0003   |
|    n_updates       | 77111    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 392      |
|    fps             | 33       |
|    time_elapsed    | 2359     |
|    total_timesteps | 78008    |
| train/             |          |
|    actor_loss      | -7.44    |
|    critic_loss     | 0.0989   |
|    ent_coef        | 0.00348  |
|    ent_coef_loss   | -76.4    |
|    learning_rate   | 0.0003   |
|    n_updates       | 77907    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 396      |
|    fps             | 33       |
|    time_elapsed    | 2384     |
|    total_timesteps | 78804    |
| train/             |          |
|    actor_loss      | -7.61    |
|    critic_loss     | 0.0705   |
|    ent_coef        | 0.00277  |
|    ent_coef_loss   | -61      |
|    learning_rate   | 0.0003   |
|    n_updates       | 78703    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 400      |
|    fps             | 33       |
|    time_elapsed    | 2405     |
|    total_timesteps | 79600    |
| train/             |          |
|    actor_loss      | -7.29    |
|    critic_loss     | 0.0704   |
|    ent_coef        | 0.00256  |
|    ent_coef_loss   | -2.06    |
|    learning_rate   | 0.0003   |
|    n_updates       | 79499    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 404      |
|    fps             | 33       |
|    time_elapsed    | 2427     |
|    total_timesteps | 80396    |
| train/             |          |
|    actor_loss      | -7.08    |
|    critic_loss     | 0.0659   |
|    ent_coef        | 0.00256  |
|    ent_coef_loss   | -10      |
|    learning_rate   | 0.0003   |
|    n_updates       | 80295    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 408      |
|    fps             | 33       |
|    time_elapsed    | 2449     |
|    total_timesteps | 81192    |
| train/             |          |
|    actor_loss      | -6.92    |
|    critic_loss     | 0.101    |
|    ent_coef        | 0.00209  |
|    ent_coef_loss   | -26.8    |
|    learning_rate   | 0.0003   |
|    n_updates       | 81091    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 412      |
|    fps             | 33       |
|    time_elapsed    | 2471     |
|    total_timesteps | 81988    |
| train/             |          |
|    actor_loss      | -6.66    |
|    critic_loss     | 0.13     |
|    ent_coef        | 0.00167  |
|    ent_coef_loss   | -39.9    |
|    learning_rate   | 0.0003   |
|    n_updates       | 81887    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 416      |
|    fps             | 33       |
|    time_elapsed    | 2492     |
|    total_timesteps | 82784    |
| train/             |          |
|    actor_loss      | -6.66    |
|    critic_loss     | 0.0828   |
|    ent_coef        | 0.00126  |
|    ent_coef_loss   | -49.3    |
|    learning_rate   | 0.0003   |
|    n_updates       | 82683    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 420      |
|    fps             | 33       |
|    time_elapsed    | 2515     |
|    total_timesteps | 83580    |
| train/             |          |
|    actor_loss      | -6.1     |
|    critic_loss     | 0.0563   |
|    ent_coef        | 0.00103  |
|    ent_coef_loss   | -15.3    |
|    learning_rate   | 0.0003   |
|    n_updates       | 83479    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 424      |
|    fps             | 33       |
|    time_elapsed    | 2537     |
|    total_timesteps | 84376    |
| train/             |          |
|    actor_loss      | -5.97    |
|    critic_loss     | 0.0764   |
|    ent_coef        | 0.000981 |
|    ent_coef_loss   | -6.55    |
|    learning_rate   | 0.0003   |
|    n_updates       | 84275    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 428      |
|    fps             | 33       |
|    time_elapsed    | 2559     |
|    total_timesteps | 85172    |
| train/             |          |
|    actor_loss      | -6.01    |
|    critic_loss     | 0.0473   |
|    ent_coef        | 0.00103  |
|    ent_coef_loss   | 1.94     |
|    learning_rate   | 0.0003   |
|    n_updates       | 85071    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 432      |
|    fps             | 33       |
|    time_elapsed    | 2583     |
|    total_timesteps | 85968    |
| train/             |          |
|    actor_loss      | -5.83    |
|    critic_loss     | 0.0603   |
|    ent_coef        | 0.000858 |
|    ent_coef_loss   | -19.6    |
|    learning_rate   | 0.0003   |
|    n_updates       | 85867    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 436      |
|    fps             | 33       |
|    time_elapsed    | 2604     |
|    total_timesteps | 86764    |
| train/             |          |
|    actor_loss      | -5.88    |
|    critic_loss     | 0.0469   |
|    ent_coef        | 0.000795 |
|    ent_coef_loss   | 7.79     |
|    learning_rate   | 0.0003   |
|    n_updates       | 86663    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 440      |
|    fps             | 33       |
|    time_elapsed    | 2631     |
|    total_timesteps | 87560    |
| train/             |          |
|    actor_loss      | -5.87    |
|    critic_loss     | 0.0458   |
|    ent_coef        | 0.000657 |
|    ent_coef_loss   | -22.9    |
|    learning_rate   | 0.0003   |
|    n_updates       | 87459    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 444      |
|    fps             | 33       |
|    time_elapsed    | 2661     |
|    total_timesteps | 88356    |
| train/             |          |
|    actor_loss      | -5.74    |
|    critic_loss     | 0.0662   |
|    ent_coef        | 0.000531 |
|    ent_coef_loss   | 0.0678   |
|    learning_rate   | 0.0003   |
|    n_updates       | 88255    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 448      |
|    fps             | 33       |
|    time_elapsed    | 2690     |
|    total_timesteps | 89152    |
| train/             |          |
|    actor_loss      | -5.14    |
|    critic_loss     | 0.0544   |
|    ent_coef        | 0.000469 |
|    ent_coef_loss   | -5.34    |
|    learning_rate   | 0.0003   |
|    n_updates       | 89051    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 452      |
|    fps             | 33       |
|    time_elapsed    | 2720     |
|    total_timesteps | 89948    |
| train/             |          |
|    actor_loss      | -5.41    |
|    critic_loss     | 0.0542   |
|    ent_coef        | 0.000458 |
|    ent_coef_loss   | -0.139   |
|    learning_rate   | 0.0003   |
|    n_updates       | 89847    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 456      |
|    fps             | 33       |
|    time_elapsed    | 2749     |
|    total_timesteps | 90744    |
| train/             |          |
|    actor_loss      | -5.48    |
|    critic_loss     | 0.0563   |
|    ent_coef        | 0.000399 |
|    ent_coef_loss   | 11.3     |
|    learning_rate   | 0.0003   |
|    n_updates       | 90643    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 460      |
|    fps             | 32       |
|    time_elapsed    | 2779     |
|    total_timesteps | 91540    |
| train/             |          |
|    actor_loss      | -5.25    |
|    critic_loss     | 0.0504   |
|    ent_coef        | 0.000467 |
|    ent_coef_loss   | 16.7     |
|    learning_rate   | 0.0003   |
|    n_updates       | 91439    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 464      |
|    fps             | 32       |
|    time_elapsed    | 2809     |
|    total_timesteps | 92336    |
| train/             |          |
|    actor_loss      | -5.19    |
|    critic_loss     | 0.0404   |
|    ent_coef        | 0.000666 |
|    ent_coef_loss   | 17.1     |
|    learning_rate   | 0.0003   |
|    n_updates       | 92235    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 468      |
|    fps             | 32       |
|    time_elapsed    | 2839     |
|    total_timesteps | 93132    |
| train/             |          |
|    actor_loss      | -5.24    |
|    critic_loss     | 0.0481   |
|    ent_coef        | 0.000755 |
|    ent_coef_loss   | 0.701    |
|    learning_rate   | 0.0003   |
|    n_updates       | 93031    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 472      |
|    fps             | 32       |
|    time_elapsed    | 2868     |
|    total_timesteps | 93928    |
| train/             |          |
|    actor_loss      | -4.94    |
|    critic_loss     | 0.0547   |
|    ent_coef        | 0.000709 |
|    ent_coef_loss   | -18.1    |
|    learning_rate   | 0.0003   |
|    n_updates       | 93827    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 476      |
|    fps             | 32       |
|    time_elapsed    | 2897     |
|    total_timesteps | 94724    |
| train/             |          |
|    actor_loss      | -5       |
|    critic_loss     | 0.0558   |
|    ent_coef        | 0.000547 |
|    ent_coef_loss   | -10.9    |
|    learning_rate   | 0.0003   |
|    n_updates       | 94623    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 480      |
|    fps             | 32       |
|    time_elapsed    | 2927     |
|    total_timesteps | 95520    |
| train/             |          |
|    actor_loss      | -4.92    |
|    critic_loss     | 0.0465   |
|    ent_coef        | 0.000533 |
|    ent_coef_loss   | -11.2    |
|    learning_rate   | 0.0003   |
|    n_updates       | 95419    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 484      |
|    fps             | 32       |
|    time_elapsed    | 2956     |
|    total_timesteps | 96316    |
| train/             |          |
|    actor_loss      | -4.95    |
|    critic_loss     | 0.0411   |
|    ent_coef        | 0.000587 |
|    ent_coef_loss   | -3.9     |
|    learning_rate   | 0.0003   |
|    n_updates       | 96215    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 488      |
|    fps             | 32       |
|    time_elapsed    | 2986     |
|    total_timesteps | 97112    |
| train/             |          |
|    actor_loss      | -4.63    |
|    critic_loss     | 0.0367   |
|    ent_coef        | 0.000592 |
|    ent_coef_loss   | 1.13     |
|    learning_rate   | 0.0003   |
|    n_updates       | 97011    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 492      |
|    fps             | 32       |
|    time_elapsed    | 3016     |
|    total_timesteps | 97908    |
| train/             |          |
|    actor_loss      | -4.61    |
|    critic_loss     | 0.0552   |
|    ent_coef        | 0.000563 |
|    ent_coef_loss   | -15.8    |
|    learning_rate   | 0.0003   |
|    n_updates       | 97807    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 496      |
|    fps             | 32       |
|    time_elapsed    | 3046     |
|    total_timesteps | 98704    |
| train/             |          |
|    actor_loss      | -4.73    |
|    critic_loss     | 0.0409   |
|    ent_coef        | 0.000753 |
|    ent_coef_loss   | 43.2     |
|    learning_rate   | 0.0003   |
|    n_updates       | 98603    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 500      |
|    fps             | 32       |
|    time_elapsed    | 3076     |
|    total_timesteps | 99500    |
| train/             |          |
|    actor_loss      | -4.53    |
|    critic_loss     | 0.0345   |
|    ent_coef        | 0.000945 |
|    ent_coef_loss   | 17       |
|    learning_rate   | 0.0003   |
|    n_updates       | 99399    |
---------------------------------


SAC : trained_models\sac_stocks.zip
Results: results\sac_multi_stock
