In [1]:
import gym
from gym import spaces
import pybullet as p
import pybullet_data
import numpy as np


In [2]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import SAC

In [3]:
import gymnasium as gym
from gymnasium import spaces
import pybullet as p
import pybullet_data
import numpy as np
import time

class RoboticDogEnv(gym.Env):
    def __init__(self):
        super(RoboticDogEnv, self).__init__()
        self.total_reward=0
        self.counter=0
        self.steps=0
        self.total_steps=0
        # Connect to PyBullet
        self.physicsClient = p.connect(p.GUI)

        # Set the simulation environment
        p.setAdditionalSearchPath(pybullet_data.getDataPath())

        # Load the plane and URDF
        self.plane_id = p.loadURDF("plane.urdf")

        # Define the URDF path as an instance attribute
        self.urdf_path = "C:/Users/youss/OneDrive - aucegypt.edu/Desktop/bolt/spot.urdf"
        self.robot_id = p.loadURDF(self.urdf_path, [0, 0, 0.15], useFixedBase=False)
        self.num_joints = p.getNumJoints(self.robot_id)

        # Ensure the robot is correctly initialized
        if self.robot_id < 0:
            raise ValueError("Failed to initialize the robot.")
        
        if self.num_joints == 0:
            raise ValueError("No joints found in the robot.")
        
        print(f"Robot ID: {self.robot_id}")
        print(f"Number of joints: {self.num_joints}")
        
        # Set gravity
        p.setGravity(0, 0, -9.8)
        
        # Get joint limits and ensure joints are valid
        self.joint_indices = []
        
        joint_limits = []
        for joint_index in range(self.num_joints):
            joint_info = p.getJointInfo(self.robot_id, joint_index)
            joint_name = joint_info[1].decode('utf-8')
            joint_lower_limit = joint_info[8]
            joint_upper_limit = joint_info[9]
            
            # Print joint information for debugging
           # print(f"Joint {joint_index}: Name: {joint_name}, Lower limit: {joint_lower_limit}, Upper limit: {joint_upper_limit}")
            
            if "coxa" in joint_name or "femur" in joint_name or "tibia" in joint_name:
                # Add fallback default limits if the provided limits are invalid
                if joint_lower_limit >= joint_upper_limit:
                    joint_lower_limit, joint_upper_limit = -1.0, 1.0
                
                joint_limits.append((joint_lower_limit, joint_upper_limit))
                self.joint_indices.append(joint_index)

                
        
                
        # Define the action and observation space
        self.action_space = spaces.Box(
            low=np.array([limit[0] for limit in joint_limits]),
            high=np.array([limit[1] for limit in joint_limits]),
            dtype=np.float32
        )
        
        self.observation_space = spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=(len(self.joint_indices) * 2 + 6,),  # Joint positions + velocities + base orientation (3) + base angular velocity (3)
            dtype=np.float32
        )
        
        self.fall_start_time = None  # Initialize the fall start time
        self.leg_indices = self.joint_indices   
        self.noise_counter = 0 
        
        self.forward=0
        self.stable=0
        self.energy=0
        self.velocity_pen=0
        self.fall=0
        self.smooth=0
        self.previous_joint_velocities = None
        self.sym=0
        
       
        
        # Store the link indices for the feet
        self.front_left_foot = self.get_link_index('foot_FL')
        self.front_right_foot = self.get_link_index('foot_FR')
        self.back_left_foot = self.get_link_index('foot_BL')
        self.back_right_foot = self.get_link_index('foot_BR')
        
        
        
        
    def get_link_index(self, link_name):
        for joint_index in range(self.num_joints):
            joint_info = p.getJointInfo(self.robot_id, joint_index)
            joint_name = joint_info[12].decode('utf-8')  # Link name is at index 12
            if link_name in joint_name:
                return joint_info[0]  # Return the link index
        raise ValueError(f"Link {link_name} not found")
        
        
    def reset(self, **kwargs):
        self.counter=self.counter+1
        self.total_steps=self.steps+self.total_steps
        if self.steps!=0: 
            average = self.total_reward/self.steps
        if self.total_reward != 0:
            print(f"Episode {self.counter}  finished with cumulative reward: {self.total_reward} and ")
            print(f"with an average reward of: {average}")
            print(f"number of steps in this episode: {self.steps}")
            print(f"total steps till now: {self.total_steps}")
            print(f"..........................................")
        self.total_reward=0
        self.steps=0
        # Reset the simulation environment
        p.resetSimulation()
        p.setAdditionalSearchPath(pybullet_data.getDataPath())
        self.plane_id = p.loadURDF("plane.urdf")
        self.robot_id = p.loadURDF(self.urdf_path, [0, 0, 0.15], useFixedBase=False)
        p.setGravity(0, 0, -9.8)
        
        # Wait for the simulation to stabilize
        time.sleep(1)
        
        # Ensure the robot is correctly initialized
        if self.robot_id < 0:
            print("Error: Robot failed to load.")
            return None, {}
        
        # Check if joints are correctly initialized
        if p.getNumJoints(self.robot_id) == 0:
            print("Error: No joints found in the robot.")
            return None, {}
        
        # Return the initial state
        initial_state = self.get_state()
       # print("Initial state:", initial_state)  # Debugging statement
        self.fall_start_time = None  # Reset the fall start time
        
        
        print(f"forward : {self.forward/2500} \n velocity penalty:{self.velocity_pen/2500} \n stability : {self.stable/2500}")
        print(f"energy : {self.energy/2500} \n  fall: {self.fall/2500} \n smooth: {self.smooth/2500} \n symmetry: {self.sym/2500}")      
                 
                 
                 
                 
                 
        self.forward=0
        self.stable=0
        self.energy=0 
        self.velocity_pen=0
        self.fall=0
        self.smooth=0
        self.sym=0
       
        
        return initial_state, {}
    
    def step(self, action):
        # Apply action to the robot
        for i, joint_index in enumerate(self.joint_indices):
            p.setJointMotorControl2(self.robot_id, joint_index, p.POSITION_CONTROL, targetPosition=action[i])
        
        # Step the simulation
        p.stepSimulation()
        time.sleep(1. / 240.)  # Adjust sleep time to control simulation speed
        
        # Get the next state, reward, and check if done
        state = self.get_state()
        reward = self.compute_reward()
        terminated = self.is_done(state)
        truncated = False  # This can be set to True based on some logic, e.g., max steps reached
        self.total_reward=self.total_reward+reward
        self.steps=self.steps+1
        return state, reward, terminated, truncated, {}
    
    def get_state(self):
        # Debugging: Check joint indices
       # print("Joint indices:", self.joint_indices)
        
        joint_states = p.getJointStates(self.robot_id, self.joint_indices)
        if joint_states is None or len(joint_states) == 0:
           # print("Error: p.getJointStates returned None or an empty list")  # Debugging statement
            return None
        
        joint_positions = [state[0] for state in joint_states]
        joint_velocities = [state[1] for state in joint_states]
        
        base_position, base_orientation = p.getBasePositionAndOrientation(self.robot_id)
        base_velocity, base_angular_velocity = p.getBaseVelocity(self.robot_id)
        
        base_orientation_euler = p.getEulerFromQuaternion(base_orientation)
        
        state = np.concatenate([joint_positions, joint_velocities, base_orientation_euler, base_angular_velocity])
        
        # Check for nan values in the state
        if np.isnan(state).any():
            print("Error: State contains NaN values")
            print("Joint positions:", joint_positions)
            print("Joint velocities:", joint_velocities)
            print("Base orientation (Euler):", base_orientation_euler)
            print("Base angular velocity:", base_angular_velocity)
        
        return state
    
    
    def compute_reward(self): #under testing 
        base_position, base_orientation = p.getBasePositionAndOrientation(self.robot_id)
        base_velocity, base_angular_velocity = p.getBaseVelocity(self.robot_id)
        
        
    
        # Calculate the forward vector from the orientation
        forward_vector = p.getMatrixFromQuaternion(base_orientation)[0:3:2]
        forward_vector = np.array(forward_vector)  # Forward direction in the world frame
        forward_velocity = np.dot(forward_vector, base_velocity[:2])
        forward_reward = forward_velocity
        
       
         # Penalize high velocities
        max_velocity_threshold = 3.0  # Set the maximum desired velocity
        if forward_velocity > max_velocity_threshold:
            velocity_penalty = (forward_velocity - max_velocity_threshold) ** 2
        else:
            velocity_penalty = -1
            
        
        
        
        base_orientation_euler = p.getEulerFromQuaternion(base_orientation)
        pitch, roll = base_orientation_euler[1], base_orientation_euler[0]
        # Set acceptable thresholds for instability
        acceptable_pitch_threshold = 0.05  # Adjust as necessary
        acceptable_roll_threshold = 0.05  # Adjust as necessary
 
        stability_penalty = 0.0
        stability_penalty += abs(pitch) - acceptable_pitch_threshold
        stability_penalty += abs(roll) - acceptable_roll_threshold
        # Penalize instability
        
       
        """"
        noise = 0.0
        self.noise_counter += 1
        if self.noise_counter % 100 == 0:  # Add noise every 100 steps
            noise = np.random.normal(0, 1)  # Adjust the standard deviation as needed
        """
        
        joint_states = p.getJointStates(self.robot_id, self.joint_indices)
        joint_angles = [state[0] for state in joint_states]
        joint_velocities = [state[1] for state in joint_states]
        # Apply dynamic energy penalty
        energy_penalty = 0.0
        joint_velocity_threshold = 10  # Threshold for penalizing high joint velocities
        for velocity in joint_velocities:
            if abs(velocity) > joint_velocity_threshold:
                energy_penalty += (abs(velocity) - joint_velocity_threshold) ** 2 
            else: energy_penalty= -100
        # Penalize high joint velocities
        
        
        
    
        fall_penalty = 0.0
       
        if base_position[2] < 0.12:
         # If the robot has fallen (height threshold)
            fall_penalty = 50.0 
        else: fall_penalty= -1
        
        
        smoothness_penalty = self.compute_smoothness_reward(joint_velocities)
        
        symmetry_penalty= self.compute_symmetry_reward()
        
        
        # Scaling factors
        forward_scale = 0#700
        stability_penalty_scale =  0#-15 #was 0.2
        energy_penalty_scale =  0#-0.1
        fall_penalty_scale =  -500.0
        
        smoothness_scale= 0# -0.05
        symmetry_penalty_scale = 0#5.0
        velocity_penalty_scale = 0
        
        
        reward = (forward_reward * forward_scale) + \
                 (velocity_penalty * velocity_penalty_scale)+ \
                 (energy_penalty*energy_penalty_scale) +\
                 (stability_penalty * stability_penalty_scale) + \
                 (fall_penalty * fall_penalty_scale) + \
                 (smoothness_penalty*smoothness_scale) +\
                 (symmetry_penalty * symmetry_penalty_scale)
               
                 
            
            
            
            
        self.forward=self.forward+(forward_reward*forward_scale)
        self.velocity_pen=self.velocity_pen+(velocity_penalty * velocity_penalty_scale)
        self.stable=self.stable+(stability_penalty*stability_penalty_scale)
        self.energy=(energy_penalty*energy_penalty_scale)+self.energy
        self.fall=self.fall+(fall_penalty * fall_penalty_scale)
        self.smooth=self.smooth+(smoothness_penalty*smoothness_scale)
        self.sym=self.sym+(symmetry_penalty * symmetry_penalty_scale)  
            
        
        return reward
    
    
    def compute_symmetry_reward(self):
        # Get the height of each foot joint
        front_left_height = p.getLinkState(self.robot_id, self.front_left_foot)[0][2]
        front_right_height = p.getLinkState(self.robot_id, self.front_right_foot)[0][2]
        back_left_height = p.getLinkState(self.robot_id, self.back_left_foot)[0][2]
        back_right_height = p.getLinkState(self.robot_id, self.back_right_foot)[0][2]
        
        # Reward for diagonal leg coordination
        symmetry_reward = 0.0
        if abs(front_left_height - back_right_height) < 0.015:  # Adjust threshold as needed
            symmetry_reward += 1.0
        if abs(front_right_height - back_left_height) < 0.015:  # Adjust threshold as needed
            symmetry_reward += 1.0

        return symmetry_reward

    
    def compute_smoothness_reward(self, joint_velocities):
        velocity_change_threshold = 2
        smoothness_penalty=0
        if self.previous_joint_velocities is None:
            smoothness_reward = 0.0
        else:
            velocity_differences = np.abs(np.array(joint_velocities) - np.array(self.previous_joint_velocities))
            velocity_differences = np.clip(velocity_differences - velocity_change_threshold, 0, None)
            smoothness_penalty = np.sum(velocity_differences)
            

        self.previous_joint_velocities = joint_velocities
        return smoothness_penalty

   


    
    
    def is_done(self, state):
        """"
        base_position, base_orientation = p.getBasePositionAndOrientation(self.robot_id)
        
        if base_position[2] < 0.2:  # If the robot has fallen
            if self.fall_start_time is None:
                self.fall_start_time = time.time()  # Start the timer
               # print("Robot has started falling")
            elif time.time() - self.fall_start_time > 2:  # Wait for 2 seconds before deciding to reset
               # print("Robot has fallen for 2 seconds, resetting...")
                return True
        else:
            self.fall_start_time = None  # Reset the timer if the robot is not fallen
        """
         
        if self.steps == 2500:
            return True 
        
        else:
            return False
    
    def render(self, mode='human'):
        pass
    
    def close(self):
        p.disconnect()

    


In [4]:
# Register the environment
gym.envs.registration.register(id='RoboticDog-v0', entry_point='__main__:RoboticDogEnv')


In [5]:

# Create the environment
env = RoboticDogEnv()


Robot ID: 1
Number of joints: 16


In [6]:

# Instantiate the agent
#model = PPO('MlpPolicy', env, verbose=1)
# for more exploration learning rate could co down till 1e-4 and entropy could go up till 0.1

model = PPO('MlpPolicy', env, verbose=1)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.



# Train the agent
model.learn(total_timesteps=1000000000)


# Load the previously trained model
model = PPO.load("C:\\Users\\youss\\OneDrive - aucegypt.edu\\Desktop\\bolt\\ppo_spot_trial_basic.zip", env=env)


In [7]:
# Train the agent
model.learn(total_timesteps=5000000)

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: 0.0 
 smooth: 0.0 
 symmetry: 0.0
-----------------------------
| time/              |      |
|    fps             | 89   |
|    iterations      | 1    |
|    time_elapsed    | 22   |
|    total_timesteps | 2048 |
-----------------------------
Episode 2  finished with cumulative reward: -3441500.0 and 
with an average reward of: -1376.0495801679328
number of steps in this episode: 2501
total steps till now: 2501
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1376.6 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -3.44e+06     |
| time/                   |               |
|    fps                  | 80            |
|    iterations           | 2             |
|    time_elapsed         | 50           

Episode 8  finished with cumulative reward: -5966000.0 and 
with an average reward of: -2385.4458216713315
number of steps in this episode: 2501
total steps till now: 17507
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2386.4 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -7.3e+06     |
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 9            |
|    time_elapsed         | 249          |
|    total_timesteps      | 18432        |
| train/                  |              |
|    approx_kl            | 9.654323e-07 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 1.03e-05     |
|    learning_rate      

Episode 14  finished with cumulative reward: -1605500.0 and 
with an average reward of: -641.9432227109156
number of steps in this episode: 2501
total steps till now: 32513
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -642.2 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -6.79e+06     |
| time/                   |               |
|    fps                  | 71            |
|    iterations           | 16            |
|    time_elapsed         | 458           |
|    total_timesteps      | 32768         |
| train/                  |               |
|    approx_kl            | 7.7550067e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 1.65e-05      |
|    learn

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -6.57e+06     |
| time/                   |               |
|    fps                  | 71            |
|    iterations           | 23            |
|    time_elapsed         | 661           |
|    total_timesteps      | 47104         |
| train/                  |               |
|    approx_kl            | 1.5369442e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 1.51e-05      |
|    learning_rate        | 0.0003        |
|    loss                 | 3.39e+09      |
|    n_updates            | 220           |
|    policy_gradient_loss | -0.000266     |
|    std                  | 1             |
|    value_loss           | 9.94e+09      |
-------------------------------------------
Episode 20  finished with cumula

Episode 25  finished with cumulative reward: -4793000.0 and 
with an average reward of: -1916.4334266293483
number of steps in this episode: 2501
total steps till now: 60024
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1917.2 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6.33e+06    |
| time/                   |              |
|    fps                  | 70           |
|    iterations           | 30           |
|    time_elapsed         | 873          |
|    total_timesteps      | 61440        |
| train/                  |              |
|    approx_kl            | 6.923801e-08 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | -1.93e-05    |
|    learning_rate     

Episode 31  finished with cumulative reward: -8694500.0 and 
with an average reward of: -3476.4094362255096
number of steps in this episode: 2501
total steps till now: 75030
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3477.8 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6.08e+06    |
| time/                   |              |
|    fps                  | 70           |
|    iterations           | 37           |
|    time_elapsed         | 1081         |
|    total_timesteps      | 75776        |
| train/                  |              |
|    approx_kl            | 1.511944e-07 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.000126     |
|    learning_rate     

Episode 37  finished with cumulative reward: -4487000.0 and 
with an average reward of: -1794.0823670531788
number of steps in this episode: 2501
total steps till now: 90036
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1794.8 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -6.13e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 44            |
|    time_elapsed         | 1289          |
|    total_timesteps      | 90112         |
| train/                  |               |
|    approx_kl            | 1.2223609e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.000305      |
|    lea

Episode 42  finished with cumulative reward: -713000.0 and 
with an average reward of: -285.0859656137545
number of steps in this episode: 2501
total steps till now: 102541
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -285.2 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.97e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 51            |
|    time_elapsed         | 1492          |
|    total_timesteps      | 104448        |
| train/                  |               |
|    approx_kl            | 3.0180672e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | -4.61e-05     |
|    learn

Episode 48  finished with cumulative reward: -126500.0 and 
with an average reward of: -50.5797680927629
number of steps in this episode: 2501
total steps till now: 117547
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -50.6 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.66e+06    |
| time/                   |              |
|    fps                  | 69           |
|    iterations           | 58           |
|    time_elapsed         | 1700         |
|    total_timesteps      | 118784       |
| train/                  |              |
|    approx_kl            | 7.796916e-08 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.000196     |
|    learning_rate        |

Episode 54  finished with cumulative reward: -5889500.0 and 
with an average reward of: -2354.858056777289
number of steps in this episode: 2501
total steps till now: 132553
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2355.8 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.76e+06    |
| time/                   |              |
|    fps                  | 69           |
|    iterations           | 65           |
|    time_elapsed         | 1910         |
|    total_timesteps      | 133120       |
| train/                  |              |
|    approx_kl            | 6.504706e-08 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.000158     |
|    learning_rate     

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.96e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 72            |
|    time_elapsed         | 2110          |
|    total_timesteps      | 147456        |
| train/                  |               |
|    approx_kl            | 3.6379788e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.000265      |
|    learning_rate        | 0.0003        |
|    loss                 | 4.64e+09      |
|    n_updates            | 710           |
|    policy_gradient_loss | -3.96e-05     |
|    std                  | 1             |
|    value_loss           | 1.01e+10      |
-------------------------------------------
Episode 60  finished with cumula

Episode 65  finished with cumulative reward: -2396000.0 and 
with an average reward of: -958.0167932826869
number of steps in this episode: 2501
total steps till now: 160064
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -958.4 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.79e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 79            |
|    time_elapsed         | 2320          |
|    total_timesteps      | 161792        |
| train/                  |               |
|    approx_kl            | 3.6670826e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.000861      |
|    lear

Episode 71  finished with cumulative reward: -7521500.0 and 
with an average reward of: -3007.3970411835267
number of steps in this episode: 2501
total steps till now: 175070
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3008.6 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.84e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 86            |
|    time_elapsed         | 2530          |
|    total_timesteps      | 176128        |
| train/                  |               |
|    approx_kl            | 1.2252713e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00126       |
|    le

Episode 77  finished with cumulative reward: -4053500.0 and 
with an average reward of: -1620.7516993202719
number of steps in this episode: 2501
total steps till now: 190076
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1621.4 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.76e+06    |
| time/                   |              |
|    fps                  | 69           |
|    iterations           | 93           |
|    time_elapsed         | 2738         |
|    total_timesteps      | 190464       |
| train/                  |              |
|    approx_kl            | 4.688627e-08 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.000204     |
|    learning_rate    

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.74e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 100           |
|    time_elapsed         | 2939          |
|    total_timesteps      | 204800        |
| train/                  |               |
|    approx_kl            | 7.2759576e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00187       |
|    learning_rate        | 0.0003        |
|    loss                 | 1.23e+10      |
|    n_updates            | 990           |
|    policy_gradient_loss | -1.14e-05     |
|    std                  | 1             |
|    value_loss           | 2.27e+10      |
-------------------------------------------
Episode 83  finished with cumula

Episode 88  finished with cumulative reward: -3594500.0 and 
with an average reward of: -1437.2251099560176
number of steps in this episode: 2501
total steps till now: 217587
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1437.8 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.83e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 107           |
|    time_elapsed         | 3148          |
|    total_timesteps      | 219136        |
| train/                  |               |
|    approx_kl            | 6.6065695e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00183       |
|    le

Episode 94  finished with cumulative reward: -6068000.0 and 
with an average reward of: -2426.2295081967213
number of steps in this episode: 2501
total steps till now: 232593
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2427.2 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.83e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 114           |
|    time_elapsed         | 3358          |
|    total_timesteps      | 233472        |
| train/                  |               |
|    approx_kl            | 2.2700988e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00102       |
|    le

Episode 100  finished with cumulative reward: -6246500.0 and 
with an average reward of: -2497.6009596161534
number of steps in this episode: 2501
total steps till now: 247599
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2498.6 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.79e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 121           |
|    time_elapsed         | 3566          |
|    total_timesteps      | 247808        |
| train/                  |               |
|    approx_kl            | 4.7148205e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00103       |
|    l

Episode 105  finished with cumulative reward: -4410500.0 and 
with an average reward of: -1763.4946021591363
number of steps in this episode: 2501
total steps till now: 260104
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1764.2 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.62e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 128           |
|    time_elapsed         | 3767          |
|    total_timesteps      | 262144        |
| train/                  |               |
|    approx_kl            | 1.8044375e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.000886      |
|    l

Episode 111  finished with cumulative reward: -5864000.0 and 
with an average reward of: -2344.6621351459416
number of steps in this episode: 2501
total steps till now: 275110
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2345.6 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.64e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 135           |
|    time_elapsed         | 3975          |
|    total_timesteps      | 276480        |
| train/                  |               |
|    approx_kl            | 1.9354047e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0014        |
|    l

Episode 117  finished with cumulative reward: -4563500.0 and 
with an average reward of: -1824.670131947221
number of steps in this episode: 2501
total steps till now: 290116
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1825.4 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.64e+06    |
| time/                   |              |
|    fps                  | 69           |
|    iterations           | 142          |
|    time_elapsed         | 4183         |
|    total_timesteps      | 290816       |
| train/                  |              |
|    approx_kl            | 2.910383e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.000206     |
|    learning_rate    

Episode 123  finished with cumulative reward: -4589000.0 and 
with an average reward of: -1834.8660535785687
number of steps in this episode: 2501
total steps till now: 305122
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1835.6 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.67e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 149           |
|    time_elapsed         | 4388          |
|    total_timesteps      | 305152        |
| train/                  |               |
|    approx_kl            | 5.5588316e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00247       |
|    l

Episode 128  finished with cumulative reward: -3569000.0 and 
with an average reward of: -1427.02918832467
number of steps in this episode: 2501
total steps till now: 317627
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1427.6 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.68e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 156           |
|    time_elapsed         | 4587          |
|    total_timesteps      | 319488        |
| train/                  |               |
|    approx_kl            | 6.1118044e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00245       |
|    lea

Episode 134  finished with cumulative reward: -9587000.0 and 
with an average reward of: -3833.266693322671
number of steps in this episode: 2501
total steps till now: 332633
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3834.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.53e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 163       |
|    time_elapsed         | 4795      |
|    total_timesteps      | 333824    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.00104   |
|    learning_rate        | 0.0003    |
|    loss                 |

Episode 140  finished with cumulative reward: -8082500.0 and 
with an average reward of: -3231.7073170731705
number of steps in this episode: 2501
total steps till now: 347639
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3233.0 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.43e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 170           |
|    time_elapsed         | 5003          |
|    total_timesteps      | 348160        |
| train/                  |               |
|    approx_kl            | 1.4260877e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.000758      |
|    l

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.62e+06    |
| time/                   |              |
|    fps                  | 69           |
|    iterations           | 177          |
|    time_elapsed         | 5203         |
|    total_timesteps      | 362496       |
| train/                  |              |
|    approx_kl            | 2.240995e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.00523      |
|    learning_rate        | 0.0003       |
|    loss                 | 1.17e+09     |
|    n_updates            | 1760         |
|    policy_gradient_loss | -1.45e-05    |
|    std                  | 1            |
|    value_loss           | 3.43e+09     |
------------------------------------------
Episode 146  finished with cumulative reward: -6221000

Episode 151  finished with cumulative reward: -2013500.0 and 
with an average reward of: -805.077968812475
number of steps in this episode: 2501
total steps till now: 375150
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -805.4 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.63e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 184           |
|    time_elapsed         | 5411          |
|    total_timesteps      | 376832        |
| train/                  |               |
|    approx_kl            | 2.0372681e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00178       |
|    lear

Episode 157  finished with cumulative reward: -8516000.0 and 
with an average reward of: -3405.0379848060775
number of steps in this episode: 2501
total steps till now: 390156
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3406.4 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.62e+06    |
| time/                   |              |
|    fps                  | 69           |
|    iterations           | 191          |
|    time_elapsed         | 5623         |
|    total_timesteps      | 391168       |
| train/                  |              |
|    approx_kl            | 3.085006e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.00124      |
|    learning_rate   

Episode 163  finished with cumulative reward: -11984000.0 and 
with an average reward of: -4791.683326669332
number of steps in this episode: 2501
total steps till now: 405162
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -4793.6 
 smooth: 0.0 
 symmetry: 0.0
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.5e+03     |
|    ep_rew_mean          | -5.68e+06   |
| time/                   |             |
|    fps                  | 69          |
|    iterations           | 198         |
|    time_elapsed         | 5832        |
|    total_timesteps      | 405504      |
| train/                  |             |
|    approx_kl            | 1.36788e-09 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -17         |
|    explained_variance   | 0.00187     |
|    learning_rate        | 0.0003  

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.76e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 205           |
|    time_elapsed         | 6030          |
|    total_timesteps      | 419840        |
| train/                  |               |
|    approx_kl            | 4.7148205e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00313       |
|    learning_rate        | 0.0003        |
|    loss                 | 4.69e+09      |
|    n_updates            | 2040          |
|    policy_gradient_loss | -2.12e-05     |
|    std                  | 1             |
|    value_loss           | 7.23e+09      |
-------------------------------------------
Episode 169  finished with cumul

Episode 174  finished with cumulative reward: 358000.0 and 
with an average reward of: 143.14274290283888
number of steps in this episode: 2501
total steps till now: 432673
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: 143.2 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.62e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 212           |
|    time_elapsed         | 6240          |
|    total_timesteps      | 434176        |
| train/                  |               |
|    approx_kl            | 5.7625584e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | -0.617        |
|    learni

Episode 180  finished with cumulative reward: -4461500.0 and 
with an average reward of: -1783.8864454218312
number of steps in this episode: 2501
total steps till now: 447679
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1784.6 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.82e+06    |
| time/                   |              |
|    fps                  | 69           |
|    iterations           | 219          |
|    time_elapsed         | 6450         |
|    total_timesteps      | 448512       |
| train/                  |              |
|    approx_kl            | 7.625204e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.00432      |
|    learning_rate   

Episode 186  finished with cumulative reward: -5048000.0 and 
with an average reward of: -2018.392642942823
number of steps in this episode: 2501
total steps till now: 462685
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2019.2 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.8e+06      |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 226           |
|    time_elapsed         | 6658          |
|    total_timesteps      | 462848        |
| train/                  |               |
|    approx_kl            | 1.4551915e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00221       |
|    le

Episode 191  finished with cumulative reward: -7496000.0 and 
with an average reward of: -2997.201119552179
number of steps in this episode: 2501
total steps till now: 475190
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2998.4 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.75e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 233           |
|    time_elapsed         | 6867          |
|    total_timesteps      | 477184        |
| train/                  |               |
|    approx_kl            | 1.7462298e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00207       |
|    le

Episode 197  finished with cumulative reward: -7929500.0 and 
with an average reward of: -3170.531787285086
number of steps in this episode: 2501
total steps till now: 490196
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3171.8 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.92e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 240           |
|    time_elapsed         | 7080          |
|    total_timesteps      | 491520        |
| train/                  |               |
|    approx_kl            | 6.1118044e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00187       |
|    le

Episode 203  finished with cumulative reward: -11525000.0 and 
with an average reward of: -4608.156737305078
number of steps in this episode: 2501
total steps till now: 505202
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -4610.0 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -6.05e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 247           |
|    time_elapsed         | 7292          |
|    total_timesteps      | 505856        |
| train/                  |               |
|    approx_kl            | 2.6193447e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00548       |
|    l

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -6.05e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 254           |
|    time_elapsed         | 7498          |
|    total_timesteps      | 520192        |
| train/                  |               |
|    approx_kl            | 3.8504368e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00267       |
|    learning_rate        | 0.0003        |
|    loss                 | 3.45e+09      |
|    n_updates            | 2530          |
|    policy_gradient_loss | -3.64e-05     |
|    std                  | 1             |
|    value_loss           | 1.52e+10      |
-------------------------------------------
Episode 209  finished with cumul

Episode 214  finished with cumulative reward: -10224500.0 and 
with an average reward of: -4088.1647341063576
number of steps in this episode: 2501
total steps till now: 532713
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -4089.8 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6.12e+06    |
| time/                   |              |
|    fps                  | 69           |
|    iterations           | 261          |
|    time_elapsed         | 7708         |
|    total_timesteps      | 534528       |
| train/                  |              |
|    approx_kl            | 5.820766e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.00333      |
|    learning_rate  

Episode 220  finished with cumulative reward: -7113500.0 and 
with an average reward of: -2844.2622950819673
number of steps in this episode: 2501
total steps till now: 547719
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2845.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.22e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 268       |
|    time_elapsed         | 7915      |
|    total_timesteps      | 548864    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.00358   |
|    learning_rate        | 0.0003    |
|    loss                 

Episode 226  finished with cumulative reward: -11474000.0 and 
with an average reward of: -4587.764894042383
number of steps in this episode: 2501
total steps till now: 562725
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -4589.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.34e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 275       |
|    time_elapsed         | 8126      |
|    total_timesteps      | 563200    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.00179   |
|    learning_rate        | 0.0003    |
|    loss                 

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.46e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 282       |
|    time_elapsed         | 8326      |
|    total_timesteps      | 577536    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.00253   |
|    learning_rate        | 0.0003    |
|    loss                 | 1.6e+10   |
|    n_updates            | 2810      |
|    policy_gradient_loss | -4.69e-06 |
|    std                  | 1         |
|    value_loss           | 3.3e+10   |
---------------------------------------
Episode 232  finished with cumulative reward: -8363000.0 and 
with an average reward of: -3343.862455017993
number of st

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -4344.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.63e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 289       |
|    time_elapsed         | 8538      |
|    total_timesteps      | 591872    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.00321   |
|    learning_rate        | 0.0003    |
|    loss                 | 1.07e+10  |
|    n_updates            | 2880      |
|    policy_gradient_loss | -4.93e-06 |
|    std                  | 1         |
|    value_loss           | 2.07e+10  |
---------------------------------------
Episo

Episode 243  finished with cumulative reward: -5252000.0 and 
with an average reward of: -2099.9600159936026
number of steps in this episode: 2501
total steps till now: 605242
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2100.8 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -6.58e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 296           |
|    time_elapsed         | 8748          |
|    total_timesteps      | 606208        |
| train/                  |               |
|    approx_kl            | 1.7462298e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00225       |
|    l

Episode 249  finished with cumulative reward: -3390500.0 and 
with an average reward of: -1355.657736905238
number of steps in this episode: 2501
total steps till now: 620248
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1356.2 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6.71e+06    |
| time/                   |              |
|    fps                  | 69           |
|    iterations           | 303          |
|    time_elapsed         | 8956         |
|    total_timesteps      | 620544       |
| train/                  |              |
|    approx_kl            | 9.313226e-10 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.00274      |
|    learning_rate    

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6.71e+06    |
| time/                   |              |
|    fps                  | 69           |
|    iterations           | 310          |
|    time_elapsed         | 9156         |
|    total_timesteps      | 634880       |
| train/                  |              |
|    approx_kl            | 5.820766e-10 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.00502      |
|    learning_rate        | 0.0003       |
|    loss                 | 3.96e+09     |
|    n_updates            | 3090         |
|    policy_gradient_loss | -1.31e-05    |
|    std                  | 1            |
|    value_loss           | 9.03e+09     |
------------------------------------------
Episode 255  finished with cumulative reward: -5430500

Episode 260  finished with cumulative reward: -3467000.0 and 
with an average reward of: -1386.2455017992802
number of steps in this episode: 2501
total steps till now: 647759
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1386.8 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6.54e+06    |
| time/                   |              |
|    fps                  | 69           |
|    iterations           | 317          |
|    time_elapsed         | 9362         |
|    total_timesteps      | 649216       |
| train/                  |              |
|    approx_kl            | 2.910383e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.00285      |
|    learning_rate   

Episode 266  finished with cumulative reward: -2855000.0 and 
with an average reward of: -1141.5433826469412
number of steps in this episode: 2501
total steps till now: 662765
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1142.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.53e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 324       |
|    time_elapsed         | 9569      |
|    total_timesteps      | 663552    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.00359   |
|    learning_rate        | 0.0003    |
|    loss                 

Episode 272  finished with cumulative reward: -5838500.0 and 
with an average reward of: -2334.466213514594
number of steps in this episode: 2501
total steps till now: 677771
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2335.4 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6.62e+06    |
| time/                   |              |
|    fps                  | 69           |
|    iterations           | 331          |
|    time_elapsed         | 9776         |
|    total_timesteps      | 677888       |
| train/                  |              |
|    approx_kl            | 8.731149e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.00497      |
|    learning_rate    

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1080.8 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -6.51e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 338           |
|    time_elapsed         | 9975          |
|    total_timesteps      | 692224        |
| train/                  |               |
|    approx_kl            | 2.3283064e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0037        |
|    learning_rate        | 0.0003        |
|    loss                 | 6.34e+09      |
|    n_updates            | 3370          |
|    policy_gradient_loss | -9.75e-06     |
|    std                  | 1             |
|    

Episode 283  finished with cumulative reward: -6731000.0 and 
with an average reward of: -2691.3234706117555
number of steps in this episode: 2501
total steps till now: 705282
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2692.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.44e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 345       |
|    time_elapsed         | 10180     |
|    total_timesteps      | 706560    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.00278   |
|    learning_rate        | 0.0003    |
|    loss                 

Episode 289  finished with cumulative reward: -5532500.0 and 
with an average reward of: -2212.1151539384246
number of steps in this episode: 2501
total steps till now: 720288
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2213.0 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6.5e+06     |
| time/                   |              |
|    fps                  | 69           |
|    iterations           | 352          |
|    time_elapsed         | 10388        |
|    total_timesteps      | 720896       |
| train/                  |              |
|    approx_kl            | 3.783498e-10 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.00217      |
|    learning_rate   

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.39e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 359       |
|    time_elapsed         | 10585     |
|    total_timesteps      | 735232    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.00354   |
|    learning_rate        | 0.0003    |
|    loss                 | 8.94e+09  |
|    n_updates            | 3580      |
|    policy_gradient_loss | -5.65e-06 |
|    std                  | 1         |
|    value_loss           | 1.5e+10   |
---------------------------------------
Episode 295  finished with cumulative reward: -2727500.0 and 
with an average reward of: -1090.563774490204
number of st

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6.32e+06    |
| time/                   |              |
|    fps                  | 69           |
|    iterations           | 366          |
|    time_elapsed         | 10793        |
|    total_timesteps      | 749568       |
| train/                  |              |
|    approx_kl            | 5.820766e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.00562      |
|    learning_rate        | 0.0003       |
|    loss                 | 5.07e+09     |
|    n_updates            | 3650         |
|    policy_gradient_loss | -7.66e-06    |
|    std                  | 1            |
|    value_loss           | 8.47e+09     |
------------------------------------------
Episode 301  finished with cumulative reward: -1962500

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -6.19e+06     |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 373           |
|    time_elapsed         | 11001         |
|    total_timesteps      | 763904        |
| train/                  |               |
|    approx_kl            | 5.2677933e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.00853       |
|    learning_rate        | 0.0003        |
|    loss                 | 2.11e+08      |
|    n_updates            | 3720          |
|    policy_gradient_loss | -1.36e-05     |
|    std                  | 1             |
|    value_loss           | 1.5e+09       |
-------------------------------------------
Episode 307  finished with cumul

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6.15e+06    |
| time/                   |              |
|    fps                  | 69           |
|    iterations           | 380          |
|    time_elapsed         | 11208        |
|    total_timesteps      | 778240       |
| train/                  |              |
|    approx_kl            | 8.731149e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0084       |
|    learning_rate        | 0.0003       |
|    loss                 | 4.87e+09     |
|    n_updates            | 3790         |
|    policy_gradient_loss | -7.87e-06    |
|    std                  | 1            |
|    value_loss           | 5.66e+09     |
------------------------------------------
---------------------------------------
| rollout/    

Episode 318  finished with cumulative reward: -10275500.0 and 
with an average reward of: -4108.556577369052
number of steps in this episode: 2501
total steps till now: 792817
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -4110.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.88e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 388       |
|    time_elapsed         | 11438     |
|    total_timesteps      | 794624    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.00461   |
|    learning_rate        | 0.0003    |
|    loss                 

Episode 324  finished with cumulative reward: -6119000.0 and 
with an average reward of: -2446.621351459416
number of steps in this episode: 2501
total steps till now: 807823
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2447.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.96e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 395       |
|    time_elapsed         | 11647     |
|    total_timesteps      | 808960    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.00546   |
|    learning_rate        | 0.0003    |
|    loss                 |

Episode 330  finished with cumulative reward: -9102500.0 and 
with an average reward of: -3639.5441823270694
number of steps in this episode: 2501
total steps till now: 822829
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3641.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.92e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 402       |
|    time_elapsed         | 11855     |
|    total_timesteps      | 823296    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0117    |
|    learning_rate        | 0.0003    |
|    loss                 

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.88e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 409       |
|    time_elapsed         | 12051     |
|    total_timesteps      | 837632    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.00922   |
|    learning_rate        | 0.0003    |
|    loss                 | 4.71e+09  |
|    n_updates            | 4080      |
|    policy_gradient_loss | -5.78e-06 |
|    std                  | 1         |
|    value_loss           | 1.59e+10  |
---------------------------------------
Episode 336  finished with cumulative reward: -6399500.0 and 
with an average reward of: -2558.7764894042384
number of s

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.73e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 416       |
|    time_elapsed         | 12261     |
|    total_timesteps      | 851968    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0103    |
|    learning_rate        | 0.0003    |
|    loss                 | 7.69e+08  |
|    n_updates            | 4150      |
|    policy_gradient_loss | -3.06e-06 |
|    std                  | 1         |
|    value_loss           | 5.08e+09  |
---------------------------------------
Episode 342  finished with cumulative reward: -10071500.0 and 
with an average reward of: -4026.9892043182726
number of 

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.57e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 423       |
|    time_elapsed         | 12457     |
|    total_timesteps      | 866304    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.011     |
|    learning_rate        | 0.0003    |
|    loss                 | 3.87e+09  |
|    n_updates            | 4220      |
|    policy_gradient_loss | -4.03e-06 |
|    std                  | 1         |
|    value_loss           | 6.9e+09   |
---------------------------------------
Episode 348  finished with cumulative reward: -3671000.0 and 
with an average reward of: -1467.8128748500599
number of s

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.57e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 430       |
|    time_elapsed         | 12642     |
|    total_timesteps      | 880640    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0108    |
|    learning_rate        | 0.0003    |
|    loss                 | 4.48e+09  |
|    n_updates            | 4290      |
|    policy_gradient_loss | -3.25e-06 |
|    std                  | 1         |
|    value_loss           | 1.12e+10  |
---------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e

Episode 359  finished with cumulative reward: -5099000.0 and 
with an average reward of: -2038.7844862055179
number of steps in this episode: 2501
total steps till now: 895358
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2039.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.54e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 438       |
|    time_elapsed         | 12850     |
|    total_timesteps      | 897024    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0117    |
|    learning_rate        | 0.0003    |
|    loss                 

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -285.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.55e+06 |
| time/                   |           |
|    fps                  | 69        |
|    iterations           | 445       |
|    time_elapsed         | 13035     |
|    total_timesteps      | 911360    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.012     |
|    learning_rate        | 0.0003    |
|    loss                 | 3.67e+09  |
|    n_updates            | 4440      |
|    policy_gradient_loss | -5.69e-06 |
|    std                  | 1         |
|    value_loss           | 1.07e+10  |
---------------------------------------
Episod

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3763.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.61e+06 |
| time/                   |           |
|    fps                  | 70        |
|    iterations           | 452       |
|    time_elapsed         | 13220     |
|    total_timesteps      | 925696    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.00876   |
|    learning_rate        | 0.0003    |
|    loss                 | 7.43e+09  |
|    n_updates            | 4510      |
|    policy_gradient_loss | -2.35e-06 |
|    std                  | 1         |
|    value_loss           | 1.41e+10  |
---------------------------------------
-----

Episode 377  finished with cumulative reward: -6093500.0 and 
with an average reward of: -2436.425429828069
number of steps in this episode: 2501
total steps till now: 940376
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2437.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.67e+06 |
| time/                   |           |
|    fps                  | 70        |
|    iterations           | 460       |
|    time_elapsed         | 13428     |
|    total_timesteps      | 942080    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0108    |
|    learning_rate        | 0.0003    |
|    loss                 |

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -591.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.56e+06 |
| time/                   |           |
|    fps                  | 70        |
|    iterations           | 467       |
|    time_elapsed         | 13614     |
|    total_timesteps      | 956416    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0156    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.84e+08  |
|    n_updates            | 4660      |
|    policy_gradient_loss | -3.12e-06 |
|    std                  | 1         |
|    value_loss           | 3.69e+09  |
---------------------------------------
Episod

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1682.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.46e+06 |
| time/                   |           |
|    fps                  | 70        |
|    iterations           | 474       |
|    time_elapsed         | 13802     |
|    total_timesteps      | 970752    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0113    |
|    learning_rate        | 0.0003    |
|    loss                 | 5.82e+09  |
|    n_updates            | 4730      |
|    policy_gradient_loss | -2.1e-06  |
|    std                  | 1         |
|    value_loss           | 1.52e+10  |
---------------------------------------
-----

Episode 395  finished with cumulative reward: -3212000.0 and 
with an average reward of: -1284.2862854858056
number of steps in this episode: 2501
total steps till now: 985394
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1284.8 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.36e+06     |
| time/                   |               |
|    fps                  | 70            |
|    iterations           | 482           |
|    time_elapsed         | 14011         |
|    total_timesteps      | 987136        |
| train/                  |               |
|    approx_kl            | 1.4551915e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0162        |
|    l

Episode 401  finished with cumulative reward: -9969500.0 and 
with an average reward of: -3986.205517792883
number of steps in this episode: 2501
total steps till now: 1000400
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3987.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.44e+06 |
| time/                   |           |
|    fps                  | 70        |
|    iterations           | 489       |
|    time_elapsed         | 14200     |
|    total_timesteps      | 1001472   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0122    |
|    learning_rate        | 0.0003    |
|    loss                 

Episode 407  finished with cumulative reward: -6680000.0 and 
with an average reward of: -2670.9316273490604
number of steps in this episode: 2501
total steps till now: 1015406
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2672.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.51e+06 |
| time/                   |           |
|    fps                  | 70        |
|    iterations           | 496       |
|    time_elapsed         | 14392     |
|    total_timesteps      | 1015808   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0117    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 413  finished with cumulative reward: -6603500.0 and 
with an average reward of: -2640.343862455018
number of steps in this episode: 2501
total steps till now: 1030412
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2641.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.44e+06 |
| time/                   |           |
|    fps                  | 70        |
|    iterations           | 504       |
|    time_elapsed         | 14601     |
|    total_timesteps      | 1032192   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.00982   |
|    learning_rate        | 0.0003    |
|    loss                 

Episode 419  finished with cumulative reward: -7878500.0 and 
with an average reward of: -3150.139944022391
number of steps in this episode: 2501
total steps till now: 1045418
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3151.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.47e+06 |
| time/                   |           |
|    fps                  | 70        |
|    iterations           | 511       |
|    time_elapsed         | 14787     |
|    total_timesteps      | 1046528   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0113    |
|    learning_rate        | 0.0003    |
|    loss                 

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -683.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.36e+06 |
| time/                   |           |
|    fps                  | 70        |
|    iterations           | 518       |
|    time_elapsed         | 14976     |
|    total_timesteps      | 1060864   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0138    |
|    learning_rate        | 0.0003    |
|    loss                 | 6.21e+09  |
|    n_updates            | 5170      |
|    policy_gradient_loss | -2.47e-06 |
|    std                  | 1         |
|    value_loss           | 1.39e+10  |
---------------------------------------
------

Episode 431  finished with cumulative reward: -11244500.0 and 
with an average reward of: -4496.001599360256
number of steps in this episode: 2501
total steps till now: 1075430
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -4497.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.42e+06 |
| time/                   |           |
|    fps                  | 70        |
|    iterations           | 526       |
|    time_elapsed         | 15185     |
|    total_timesteps      | 1077248   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.011     |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1560.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.35e+06 |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 533       |
|    time_elapsed         | 15373     |
|    total_timesteps      | 1091584   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0152    |
|    learning_rate        | 0.0003    |
|    loss                 | 2.45e+09  |
|    n_updates            | 5320      |
|    policy_gradient_loss | -3.66e-06 |
|    std                  | 1         |
|    value_loss           | 1.03e+10  |
---------------------------------------
Episo

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -795.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.4e+06  |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 540       |
|    time_elapsed         | 15561     |
|    total_timesteps      | 1105920   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0104    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.14e+09  |
|    n_updates            | 5390      |
|    policy_gradient_loss | -2.41e-06 |
|    std                  | 1         |
|    value_loss           | 6.36e+09  |
---------------------------------------
Episod

Episode 449  finished with cumulative reward: -14559500.0 and 
with an average reward of: -5821.471411435426
number of steps in this episode: 2501
total steps till now: 1120448
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -5823.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.47e+06 |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 548       |
|    time_elapsed         | 15771     |
|    total_timesteps      | 1122304   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0104    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2651.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.55e+06 |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 555       |
|    time_elapsed         | 15960     |
|    total_timesteps      | 1136640   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0137    |
|    learning_rate        | 0.0003    |
|    loss                 | 4.78e+09  |
|    n_updates            | 5540      |
|    policy_gradient_loss | -3.41e-06 |
|    std                  | 1         |
|    value_loss           | 9.07e+09  |
---------------------------------------
Episo

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2519.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.62e+06 |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 562       |
|    time_elapsed         | 16148     |
|    total_timesteps      | 1150976   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0112    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.43e+10  |
|    n_updates            | 5610      |
|    policy_gradient_loss | -3.54e-06 |
|    std                  | 1         |
|    value_loss           | 2.8e+10   |
---------------------------------------
Episo

Episode 467  finished with cumulative reward: -3977000.0 and 
with an average reward of: -1590.1639344262296
number of steps in this episode: 2501
total steps till now: 1165466
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1590.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.55e+06 |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 570       |
|    time_elapsed         | 16359     |
|    total_timesteps      | 1167360   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0148    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 473  finished with cumulative reward: -3237500.0 and 
with an average reward of: -1294.4822071171532
number of steps in this episode: 2501
total steps till now: 1180472
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1295.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.56e+06 |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 577       |
|    time_elapsed         | 16547     |
|    total_timesteps      | 1181696   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.016     |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1723.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.55e+06 |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 584       |
|    time_elapsed         | 16734     |
|    total_timesteps      | 1196032   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0151    |
|    learning_rate        | 0.0003    |
|    loss                 | 8.14e+09  |
|    n_updates            | 5830      |
|    policy_gradient_loss | -2.21e-06 |
|    std                  | 1         |
|    value_loss           | 1.04e+10  |
---------------------------------------
Episo

Episode 485  finished with cumulative reward: -6629000.0 and 
with an average reward of: -2650.5397840863657
number of steps in this episode: 2501
total steps till now: 1210484
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2651.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.65e+06 |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 592       |
|    time_elapsed         | 16945     |
|    total_timesteps      | 1212416   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0178    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 491  finished with cumulative reward: -7980500.0 and 
with an average reward of: -3190.9236305477807
number of steps in this episode: 2501
total steps till now: 1225490
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3192.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.83e+06 |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 599       |
|    time_elapsed         | 17133     |
|    total_timesteps      | 1226752   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0165    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -4344.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.93e+06 |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 606       |
|    time_elapsed         | 17322     |
|    total_timesteps      | 1241088   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0166    |
|    learning_rate        | 0.0003    |
|    loss                 | 7.23e+09  |
|    n_updates            | 6050      |
|    policy_gradient_loss | -2.34e-06 |
|    std                  | 1         |
|    value_loss           | 1.26e+10  |
---------------------------------------
Episo

Episode 503  finished with cumulative reward: -9408500.0 and 
with an average reward of: -3761.895241903239
number of steps in this episode: 2501
total steps till now: 1255502
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3763.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.95e+06 |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 614       |
|    time_elapsed         | 17533     |
|    total_timesteps      | 1257472   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0171    |
|    learning_rate        | 0.0003    |
|    loss                 

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2151.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.93e+06 |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 621       |
|    time_elapsed         | 17722     |
|    total_timesteps      | 1271808   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0153    |
|    learning_rate        | 0.0003    |
|    loss                 | 2.36e+09  |
|    n_updates            | 6200      |
|    policy_gradient_loss | -1.61e-06 |
|    std                  | 1         |
|    value_loss           | 6.18e+09  |
---------------------------------------
Episo

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.92e+06 |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 628       |
|    time_elapsed         | 17908     |
|    total_timesteps      | 1286144   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0164    |
|    learning_rate        | 0.0003    |
|    loss                 | 5.31e+09  |
|    n_updates            | 6270      |
|    policy_gradient_loss | -1.61e-06 |
|    std                  | 1         |
|    value_loss           | 1.39e+10  |
---------------------------------------
Episode 516  finished with cumulative reward: -1835000.0 and 
with an average reward of: -733.7065173930428
number of st

Episode 521  finished with cumulative reward: -7343000.0 and 
with an average reward of: -2936.0255897640945
number of steps in this episode: 2501
total steps till now: 1300520
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2937.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.86e+06 |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 636       |
|    time_elapsed         | 18119     |
|    total_timesteps      | 1302528   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0174    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 527  finished with cumulative reward: -1452500.0 and 
with an average reward of: -580.7676929228309
number of steps in this episode: 2501
total steps till now: 1315526
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -581.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6e+06    |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 643       |
|    time_elapsed         | 18306     |
|    total_timesteps      | 1316864   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.027     |
|    learning_rate        | 0.0003    |
|    loss                 |

Episode 533  finished with cumulative reward: -6807500.0 and 
with an average reward of: -2721.911235505798
number of steps in this episode: 2501
total steps till now: 1330532
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2723.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.89e+06 |
| time/                   |           |
|    fps                  | 71        |
|    iterations           | 650       |
|    time_elapsed         | 18495     |
|    total_timesteps      | 1331200   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0177    |
|    learning_rate        | 0.0003    |
|    loss                 

Episode 539  finished with cumulative reward: -4869500.0 and 
with an average reward of: -1947.0211915233906
number of steps in this episode: 2501
total steps till now: 1345538
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1947.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.91e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 658       |
|    time_elapsed         | 18705     |
|    total_timesteps      | 1347584   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0161    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2029.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.16e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 665       |
|    time_elapsed         | 18894     |
|    total_timesteps      | 1361920   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.023     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.99e+09  |
|    n_updates            | 6640      |
|    policy_gradient_loss | -2.16e-06 |
|    std                  | 1         |
|    value_loss           | 5.52e+09  |
---------------------------------------
Episo

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6e+06    |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 672       |
|    time_elapsed         | 19082     |
|    total_timesteps      | 1376256   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0174    |
|    learning_rate        | 0.0003    |
|    loss                 | 2.54e+09  |
|    n_updates            | 6710      |
|    policy_gradient_loss | -4.36e-06 |
|    std                  | 1         |
|    value_loss           | 7.42e+09  |
---------------------------------------
Episode 552  finished with cumulative reward: -6017000.0 and 
with an average reward of: -2405.837664934026
number of st

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.03e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 680       |
|    time_elapsed         | 19291     |
|    total_timesteps      | 1392640   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0259    |
|    learning_rate        | 0.0003    |
|    loss                 | 8.92e+08  |
|    n_updates            | 6790      |
|    policy_gradient_loss | -2.8e-06  |
|    std                  | 1         |
|    value_loss           | 2.21e+09  |
---------------------------------------
Episode 558  finished with cumulative reward: -7394000.0 and 
with an average reward of: -2956.4174330267892
number of s

Episode 564  finished with cumulative reward: -3722000.0 and 
with an average reward of: -1488.2047181127548
number of steps in this episode: 2501
total steps till now: 1408063
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1488.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.97e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 688       |
|    time_elapsed         | 19508     |
|    total_timesteps      | 1409024   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0192    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2845.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.97e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 695       |
|    time_elapsed         | 19698     |
|    total_timesteps      | 1423360   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0183    |
|    learning_rate        | 0.0003    |
|    loss                 | 8.6e+09   |
|    n_updates            | 6940      |
|    policy_gradient_loss | -1.37e-06 |
|    std                  | 1         |
|    value_loss           | 1.61e+10  |
---------------------------------------
-----

Episode 576  finished with cumulative reward: -3467000.0 and 
with an average reward of: -1386.2455017992802
number of steps in this episode: 2501
total steps till now: 1438075
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1386.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.91e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 703       |
|    time_elapsed         | 19906     |
|    total_timesteps      | 1439744   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0285    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 582  finished with cumulative reward: -3926000.0 and 
with an average reward of: -1569.7720911635347
number of steps in this episode: 2501
total steps till now: 1453081
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1570.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.92e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 710       |
|    time_elapsed         | 20093     |
|    total_timesteps      | 1454080   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0215    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 588  finished with cumulative reward: -8516000.0 and 
with an average reward of: -3405.0379848060775
number of steps in this episode: 2501
total steps till now: 1468087
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3406.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.91e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 717       |
|    time_elapsed         | 20283     |
|    total_timesteps      | 1468416   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0132    |
|    learning_rate        | 0.0003    |
|    loss                

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.92e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 724       |
|    time_elapsed         | 20466     |
|    total_timesteps      | 1482752   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0168    |
|    learning_rate        | 0.0003    |
|    loss                 | 4.77e+09  |
|    n_updates            | 7230      |
|    policy_gradient_loss | -1.8e-06  |
|    std                  | 1         |
|    value_loss           | 7.31e+09  |
---------------------------------------
Episode 594  finished with cumulative reward: -4487000.0 and 
with an average reward of: -1794.0823670531788
number of s

Episode 600  finished with cumulative reward: -7827500.0 and 
with an average reward of: -3129.748100759696
number of steps in this episode: 2501
total steps till now: 1498099
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3131.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.96e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 732       |
|    time_elapsed         | 20684     |
|    total_timesteps      | 1499136   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0158    |
|    learning_rate        | 0.0003    |
|    loss                 

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3355.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.94e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 739       |
|    time_elapsed         | 20871     |
|    total_timesteps      | 1513472   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0156    |
|    learning_rate        | 0.0003    |
|    loss                 | 5.97e+09  |
|    n_updates            | 7380      |
|    policy_gradient_loss | -1.13e-06 |
|    std                  | 1         |
|    value_loss           | 1.48e+10  |
---------------------------------------
-----

Episode 612  finished with cumulative reward: -6246500.0 and 
with an average reward of: -2497.6009596161534
number of steps in this episode: 2501
total steps till now: 1528111
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2498.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.13e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 747       |
|    time_elapsed         | 21079     |
|    total_timesteps      | 1529856   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0181    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -856.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.09e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 754       |
|    time_elapsed         | 21266     |
|    total_timesteps      | 1544192   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0253    |
|    learning_rate        | 0.0003    |
|    loss                 | 3.68e+09  |
|    n_updates            | 7530      |
|    policy_gradient_loss | -3.42e-06 |
|    std                  | 1         |
|    value_loss           | 4.87e+09  |
---------------------------------------
Episod

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6.08e+06    |
| time/                   |              |
|    fps                  | 72           |
|    iterations           | 761          |
|    time_elapsed         | 21452        |
|    total_timesteps      | 1558528      |
| train/                  |              |
|    approx_kl            | 2.910383e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0163       |
|    learning_rate        | 0.0003       |
|    loss                 | 1.56e+10     |
|    n_updates            | 7600         |
|    policy_gradient_loss | -5.5e-06     |
|    std                  | 1            |
|    value_loss           | 3.7e+10      |
------------------------------------------
---------------------------------------
| rollout/    

Episode 630  finished with cumulative reward: -3824000.0 and 
with an average reward of: -1528.9884046381446
number of steps in this episode: 2501
total steps till now: 1573129
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1529.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.22e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 769       |
|    time_elapsed         | 21660     |
|    total_timesteps      | 1574912   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0254    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 636  finished with cumulative reward: -8414000.0 and 
with an average reward of: -3364.2542982806876
number of steps in this episode: 2501
total steps till now: 1588135
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3365.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.2e+06  |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 776       |
|    time_elapsed         | 21857     |
|    total_timesteps      | 1589248   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0222    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2468.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.1e+06  |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 783       |
|    time_elapsed         | 22047     |
|    total_timesteps      | 1603584   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0228    |
|    learning_rate        | 0.0003    |
|    loss                 | 4.1e+09   |
|    n_updates            | 7820      |
|    policy_gradient_loss | -1.52e-06 |
|    std                  | 1         |
|    value_loss           | 1.11e+10  |
---------------------------------------
-----

Episode 648  finished with cumulative reward: -6935000.0 and 
with an average reward of: -2772.890843662535
number of steps in this episode: 2501
total steps till now: 1618147
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2774.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.94e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 791       |
|    time_elapsed         | 22257     |
|    total_timesteps      | 1619968   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0184    |
|    learning_rate        | 0.0003    |
|    loss                 

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2876.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.87e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 798       |
|    time_elapsed         | 22443     |
|    total_timesteps      | 1634304   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0226    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.03e+10  |
|    n_updates            | 7970      |
|    policy_gradient_loss | -1.78e-06 |
|    std                  | 1         |
|    value_loss           | 1.74e+10  |
---------------------------------------
Episo

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.83e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 805       |
|    time_elapsed         | 22632     |
|    total_timesteps      | 1648640   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.025     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.94e+09  |
|    n_updates            | 8040      |
|    policy_gradient_loss | -2.71e-06 |
|    std                  | 1         |
|    value_loss           | 5.72e+09  |
---------------------------------------
Episode 661  finished with cumulative reward: -8720000.0 and 
with an average reward of: -3486.605357856857
number of st

Episode 666  finished with cumulative reward: -8618000.0 and 
with an average reward of: -3445.8216713314673
number of steps in this episode: 2501
total steps till now: 1663165
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3447.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.95e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 813       |
|    time_elapsed         | 22843     |
|    total_timesteps      | 1665024   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0186    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3273.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.01e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 820       |
|    time_elapsed         | 23032     |
|    total_timesteps      | 1679360   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0189    |
|    learning_rate        | 0.0003    |
|    loss                 | 5.32e+09  |
|    n_updates            | 8190      |
|    policy_gradient_loss | -1.67e-06 |
|    std                  | 1         |
|    value_loss           | 1.01e+10  |
---------------------------------------
Episo

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.15e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 827       |
|    time_elapsed         | 23220     |
|    total_timesteps      | 1693696   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0207    |
|    learning_rate        | 0.0003    |
|    loss                 | 6.23e+09  |
|    n_updates            | 8260      |
|    policy_gradient_loss | -3.4e-06  |
|    std                  | 1         |
|    value_loss           | 1.58e+10  |
---------------------------------------
Episode 679  finished with cumulative reward: -2982500.0 and 
with an average reward of: -1192.5229908036786
number of s

Episode 684  finished with cumulative reward: -6042500.0 and 
with an average reward of: -2416.0335865653738
number of steps in this episode: 2501
total steps till now: 1708183
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2417.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.18e+06 |
| time/                   |           |
|    fps                  | 72        |
|    iterations           | 835       |
|    time_elapsed         | 23429     |
|    total_timesteps      | 1710080   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0202    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 690  finished with cumulative reward: -3186500.0 and 
with an average reward of: -1274.0903638544582
number of steps in this episode: 2501
total steps till now: 1723189
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1274.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.06e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 842       |
|    time_elapsed         | 23617     |
|    total_timesteps      | 1724416   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0206    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3182.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.09e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 849       |
|    time_elapsed         | 23808     |
|    total_timesteps      | 1738752   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0299    |
|    learning_rate        | 0.0003    |
|    loss                 | 2.92e+09  |
|    n_updates            | 8480      |
|    policy_gradient_loss | -2.63e-06 |
|    std                  | 1         |
|    value_loss           | 5.58e+09  |
---------------------------------------
Episo

Episode 702  finished with cumulative reward: -2829500.0 and 
with an average reward of: -1131.3474610155938
number of steps in this episode: 2501
total steps till now: 1753201
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1131.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.96e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 857       |
|    time_elapsed         | 24018     |
|    total_timesteps      | 1755136   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0228    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3579.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.07e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 864       |
|    time_elapsed         | 24208     |
|    total_timesteps      | 1769472   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0226    |
|    learning_rate        | 0.0003    |
|    loss                 | 8.44e+09  |
|    n_updates            | 8630      |
|    policy_gradient_loss | -3.48e-06 |
|    std                  | 1         |
|    value_loss           | 2.2e+10   |
---------------------------------------
Episo

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.05e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 871       |
|    time_elapsed         | 24398     |
|    total_timesteps      | 1783808   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0187    |
|    learning_rate        | 0.0003    |
|    loss                 | 3.73e+09  |
|    n_updates            | 8700      |
|    policy_gradient_loss | -1.94e-06 |
|    std                  | 1         |
|    value_loss           | 1.23e+10  |
---------------------------------------
Episode 715  finished with cumulative reward: -3824000.0 and 
with an average reward of: -1528.9884046381446
number of s

Episode 720  finished with cumulative reward: -3416000.0 and 
with an average reward of: -1365.8536585365853
number of steps in this episode: 2501
total steps till now: 1798219
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1366.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.07e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 879       |
|    time_elapsed         | 24610     |
|    total_timesteps      | 1800192   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0242    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1407.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.96e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 886       |
|    time_elapsed         | 24799     |
|    total_timesteps      | 1814528   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0184    |
|    learning_rate        | 0.0003    |
|    loss                 | 3.59e+09  |
|    n_updates            | 8850      |
|    policy_gradient_loss | -1.69e-06 |
|    std                  | 1         |
|    value_loss           | 9.66e+09  |
---------------------------------------
Episo

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.94e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 893       |
|    time_elapsed         | 24990     |
|    total_timesteps      | 1828864   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0186    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.12e+10  |
|    n_updates            | 8920      |
|    policy_gradient_loss | -1.69e-06 |
|    std                  | 1         |
|    value_loss           | 2.46e+10  |
---------------------------------------
Episode 733  finished with cumulative reward: -3671000.0 and 
with an average reward of: -1467.8128748500599
number of s

Episode 738  finished with cumulative reward: -3696500.0 and 
with an average reward of: -1478.0087964814074
number of steps in this episode: 2501
total steps till now: 1843237
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1478.6 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6e+06       |
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 901          |
|    time_elapsed         | 25201        |
|    total_timesteps      | 1845248      |
| train/                  |              |
|    approx_kl            | 4.947651e-10 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0305       |
|    learning_rate  

Episode 744  finished with cumulative reward: -6756500.0 and 
with an average reward of: -2701.5193922431026
number of steps in this episode: 2501
total steps till now: 1858243
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2702.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.02e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 908       |
|    time_elapsed         | 25391     |
|    total_timesteps      | 1859584   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0197    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 750  finished with cumulative reward: -2600000.0 and 
with an average reward of: -1039.5841663334666
number of steps in this episode: 2501
total steps till now: 1873249
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1040.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.94e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 915       |
|    time_elapsed         | 25580     |
|    total_timesteps      | 1873920   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0273    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1784.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.92e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 922       |
|    time_elapsed         | 25771     |
|    total_timesteps      | 1888256   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0232    |
|    learning_rate        | 0.0003    |
|    loss                 | 8.46e+09  |
|    n_updates            | 9210      |
|    policy_gradient_loss | -2.11e-06 |
|    std                  | 1         |
|    value_loss           | 1.46e+10  |
---------------------------------------
-----

Episode 762  finished with cumulative reward: -13590500.0 and 
with an average reward of: -5434.026389444222
number of steps in this episode: 2501
total steps till now: 1903261
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -5436.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6e+06    |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 930       |
|    time_elapsed         | 25983     |
|    total_timesteps      | 1904640   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0221    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 768  finished with cumulative reward: -11499500.0 and 
with an average reward of: -4597.96081567373
number of steps in this episode: 2501
total steps till now: 1918267
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -4599.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.96e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 937       |
|    time_elapsed         | 26173     |
|    total_timesteps      | 1918976   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0208    |
|    learning_rate        | 0.0003    |
|    loss                 

Episode 774  finished with cumulative reward: -5022500.0 and 
with an average reward of: -2008.1967213114754
number of steps in this episode: 2501
total steps till now: 1933273
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2009.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.78e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 944       |
|    time_elapsed         | 26362     |
|    total_timesteps      | 1933312   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0224    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1896.8 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.71e+06     |
| time/                   |               |
|    fps                  | 73            |
|    iterations           | 951           |
|    time_elapsed         | 26545         |
|    total_timesteps      | 1947648       |
| train/                  |               |
|    approx_kl            | 5.7625584e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0303        |
|    learning_rate        | 0.0003        |
|    loss                 | 1.73e+09      |
|    n_updates            | 9500          |
|    policy_gradient_loss | -1.66e-05     |
|    std                  | 1             |
|    

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2559.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.68e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 958       |
|    time_elapsed         | 26735     |
|    total_timesteps      | 1961984   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0176    |
|    learning_rate        | 0.0003    |
|    loss                 | 6.98e+09  |
|    n_updates            | 9570      |
|    policy_gradient_loss | -1.12e-06 |
|    std                  | 1         |
|    value_loss           | 1.56e+10  |
---------------------------------------
Episo

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3049.4 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.69e+06    |
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 965          |
|    time_elapsed         | 26925        |
|    total_timesteps      | 1976320      |
| train/                  |              |
|    approx_kl            | 8.032657e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.027        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.29e+09     |
|    n_updates            | 9640         |
|    policy_gradient_loss | -1.4e-05     |
|    std                  | 1            |
|    value_loss          

Episode 797  finished with cumulative reward: -6221000.0 and 
with an average reward of: -2487.405037984806
number of steps in this episode: 2501
total steps till now: 1990796
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2488.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.64e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 973       |
|    time_elapsed         | 27137     |
|    total_timesteps      | 1992704   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0211    |
|    learning_rate        | 0.0003    |
|    loss                 

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1917.2 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.68e+06     |
| time/                   |               |
|    fps                  | 73            |
|    iterations           | 980           |
|    time_elapsed         | 27327         |
|    total_timesteps      | 2007040       |
| train/                  |               |
|    approx_kl            | 1.1641532e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0266        |
|    learning_rate        | 0.0003        |
|    loss                 | 4.37e+09      |
|    n_updates            | 9790          |
|    policy_gradient_loss | -4.32e-06     |
|    std                  | 1             |
|    

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1621.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.4e+06  |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 987       |
|    time_elapsed         | 27519     |
|    total_timesteps      | 2021376   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0267    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.03e+09  |
|    n_updates            | 9860      |
|    policy_gradient_loss | -1.55e-06 |
|    std                  | 1         |
|    value_loss           | 5.23e+09  |
---------------------------------------
Episo

Episode 815  finished with cumulative reward: -9357500.0 and 
with an average reward of: -3741.5033986405438
number of steps in this episode: 2501
total steps till now: 2035814
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3743.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.58e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 995       |
|    time_elapsed         | 27728     |
|    total_timesteps      | 2037760   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0189    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -4722.2 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.64e+06    |
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 1002         |
|    time_elapsed         | 27917        |
|    total_timesteps      | 2052096      |
| train/                  |              |
|    approx_kl            | 5.820766e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0259       |
|    learning_rate        | 0.0003       |
|    loss                 | 5.86e+09     |
|    n_updates            | 10010        |
|    policy_gradient_loss | -4.46e-06    |
|    std                  | 1            |
|    value_loss          

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2202.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.77e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1009      |
|    time_elapsed         | 28105     |
|    total_timesteps      | 2066432   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0258    |
|    learning_rate        | 0.0003    |
|    loss                 | 6.05e+09  |
|    n_updates            | 10080     |
|    policy_gradient_loss | -2.05e-06 |
|    std                  | 1         |
|    value_loss           | 9.82e+09  |
---------------------------------------
Episo

Episode 833  finished with cumulative reward: -3849500.0 and 
with an average reward of: -1539.1843262694922
number of steps in this episode: 2501
total steps till now: 2080832
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1539.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.68e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1017      |
|    time_elapsed         | 28312     |
|    total_timesteps      | 2082816   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0265    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3365.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.73e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1024      |
|    time_elapsed         | 28501     |
|    total_timesteps      | 2097152   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0187    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.31e+10  |
|    n_updates            | 10230     |
|    policy_gradient_loss | -1.6e-06  |
|    std                  | 1         |
|    value_loss           | 2.35e+10  |
---------------------------------------
Episo

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.55e+06     |
| time/                   |               |
|    fps                  | 73            |
|    iterations           | 1031          |
|    time_elapsed         | 28693         |
|    total_timesteps      | 2111488       |
| train/                  |               |
|    approx_kl            | 9.0221874e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0373        |
|    learning_rate        | 0.0003        |
|    loss                 | 4.66e+07      |
|    n_updates            | 10300         |
|    policy_gradient_loss | -5.61e-06     |
|    std                  | 1             |
|    value_loss           | 1.21e+09      |
-------------------------------------------
Episode 846  finished with cumul

Episode 851  finished with cumulative reward: -7572500.0 and 
with an average reward of: -3027.7888844462213
number of steps in this episode: 2501
total steps till now: 2125850
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3029.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.77e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1039      |
|    time_elapsed         | 28903     |
|    total_timesteps      | 2127872   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0247    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 857  finished with cumulative reward: -3441500.0 and 
with an average reward of: -1376.0495801679328
number of steps in this episode: 2501
total steps till now: 2140856
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1376.6 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.86e+06    |
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 1046         |
|    time_elapsed         | 29093        |
|    total_timesteps      | 2142208      |
| train/                  |              |
|    approx_kl            | 8.731149e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0341       |
|    learning_rate  

Episode 863  finished with cumulative reward: -7980500.0 and 
with an average reward of: -3190.9236305477807
number of steps in this episode: 2501
total steps till now: 2155862
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3192.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.83e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1053      |
|    time_elapsed         | 29283     |
|    total_timesteps      | 2156544   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0307    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 869  finished with cumulative reward: -4283000.0 and 
with an average reward of: -1712.5149940023991
number of steps in this episode: 2501
total steps till now: 2170868
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1713.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.95e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1060      |
|    time_elapsed         | 29472     |
|    total_timesteps      | 2170880   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0207    |
|    learning_rate        | 0.0003    |
|    loss                

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6e+06    |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1067      |
|    time_elapsed         | 29655     |
|    total_timesteps      | 2185216   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0181    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.05e+10  |
|    n_updates            | 10660     |
|    policy_gradient_loss | -1.81e-06 |
|    std                  | 1         |
|    value_loss           | 1.65e+10  |
---------------------------------------
Episode 875  finished with cumulative reward: -4971500.0 and 
with an average reward of: -1987.8048780487804
number of s

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.06e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1074      |
|    time_elapsed         | 29845     |
|    total_timesteps      | 2199552   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0269    |
|    learning_rate        | 0.0003    |
|    loss                 | 5.46e+09  |
|    n_updates            | 10730     |
|    policy_gradient_loss | -2.87e-06 |
|    std                  | 1         |
|    value_loss           | 1.39e+10  |
---------------------------------------
Episode 881  finished with cumulative reward: -7113500.0 and 
with an average reward of: -2844.2622950819673
number of s

Episode 887  finished with cumulative reward: -5226500.0 and 
with an average reward of: -2089.764094362255
number of steps in this episode: 2501
total steps till now: 2215886
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2090.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.1e+06  |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1082      |
|    time_elapsed         | 30060     |
|    total_timesteps      | 2215936   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0286    |
|    learning_rate        | 0.0003    |
|    loss                 

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.15e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1089      |
|    time_elapsed         | 30244     |
|    total_timesteps      | 2230272   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.022     |
|    learning_rate        | 0.0003    |
|    loss                 | 9.03e+09  |
|    n_updates            | 10880     |
|    policy_gradient_loss | -2.83e-06 |
|    std                  | 1         |
|    value_loss           | 1.73e+10  |
---------------------------------------
Episode 893  finished with cumulative reward: -6960500.0 and 
with an average reward of: -2783.0867652938823
number of s

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2478.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.11e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1096      |
|    time_elapsed         | 30433     |
|    total_timesteps      | 2244608   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.023     |
|    learning_rate        | 0.0003    |
|    loss                 | 7.2e+09   |
|    n_updates            | 10950     |
|    policy_gradient_loss | -9.71e-07 |
|    std                  | 1         |
|    value_loss           | 1.3e+10   |
---------------------------------------
Episo

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1886.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.18e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1103      |
|    time_elapsed         | 30622     |
|    total_timesteps      | 2258944   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0281    |
|    learning_rate        | 0.0003    |
|    loss                 | 6.52e+09  |
|    n_updates            | 11020     |
|    policy_gradient_loss | -3.04e-06 |
|    std                  | 1         |
|    value_loss           | 1.15e+10  |
---------------------------------------
Episo

Episode 910  finished with cumulative reward: -4130000.0 and 
with an average reward of: -1651.3394642143144
number of steps in this episode: 2501
total steps till now: 2273409
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1652.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.23e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1111      |
|    time_elapsed         | 30832     |
|    total_timesteps      | 2275328   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0248    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2345.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.16e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1118      |
|    time_elapsed         | 31020     |
|    total_timesteps      | 2289664   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0312    |
|    learning_rate        | 0.0003    |
|    loss                 | 7.02e+09  |
|    n_updates            | 11170     |
|    policy_gradient_loss | -4.35e-06 |
|    std                  | 1         |
|    value_loss           | 1.1e+10   |
---------------------------------------
Episo

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.11e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1125      |
|    time_elapsed         | 31210     |
|    total_timesteps      | 2304000   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0288    |
|    learning_rate        | 0.0003    |
|    loss                 | 8.26e+08  |
|    n_updates            | 11240     |
|    policy_gradient_loss | -1.25e-06 |
|    std                  | 1         |
|    value_loss           | 7.01e+09  |
---------------------------------------
Episode 923  finished with cumulative reward: -10887500.0 and 
with an average reward of: -4353.258696521391
number of s

Episode 928  finished with cumulative reward: -6578000.0 and 
with an average reward of: -2630.1479408236705
number of steps in this episode: 2501
total steps till now: 2318427
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2631.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.11e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1133      |
|    time_elapsed         | 31422     |
|    total_timesteps      | 2320384   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.022     |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2876.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.12e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1140      |
|    time_elapsed         | 31610     |
|    total_timesteps      | 2334720   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0266    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.29e+10  |
|    n_updates            | 11390     |
|    policy_gradient_loss | -1.53e-06 |
|    std                  | 1         |
|    value_loss           | 1.56e+10  |
---------------------------------------
Episo

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2978.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.08e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1147      |
|    time_elapsed         | 31800     |
|    total_timesteps      | 2349056   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0413    |
|    learning_rate        | 0.0003    |
|    loss                 | 3.58e+09  |
|    n_updates            | 11460     |
|    policy_gradient_loss | -1.41e-06 |
|    std                  | 1         |
|    value_loss           | 4.61e+09  |
---------------------------------------
Episo

Episode 946  finished with cumulative reward: -4691000.0 and 
with an average reward of: -1875.6497401039585
number of steps in this episode: 2501
total steps till now: 2363445
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1876.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.18e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1155      |
|    time_elapsed         | 32010     |
|    total_timesteps      | 2365440   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0273    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 952  finished with cumulative reward: -5507000.0 and 
with an average reward of: -2201.919232307077
number of steps in this episode: 2501
total steps till now: 2378451
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2202.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.06e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1162      |
|    time_elapsed         | 32200     |
|    total_timesteps      | 2379776   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0313    |
|    learning_rate        | 0.0003    |
|    loss                 

Episode 958  finished with cumulative reward: -1503500.0 and 
with an average reward of: -601.1595361855258
number of steps in this episode: 2501
total steps till now: 2393457
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -601.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.97e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1169      |
|    time_elapsed         | 32388     |
|    total_timesteps      | 2394112   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0276    |
|    learning_rate        | 0.0003    |
|    loss                 |

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.85e+06    |
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 1176         |
|    time_elapsed         | 32571        |
|    total_timesteps      | 2408448      |
| train/                  |              |
|    approx_kl            | 7.124618e-08 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.035        |
|    learning_rate        | 0.0003       |
|    loss                 | 5.02e+09     |
|    n_updates            | 11750        |
|    policy_gradient_loss | -1.18e-05    |
|    std                  | 1            |
|    value_loss           | 7.29e+09     |
------------------------------------------
Episode 964  finished with cumulative reward: -3671000

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2519.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.72e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1183      |
|    time_elapsed         | 32763     |
|    total_timesteps      | 2422784   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0319    |
|    learning_rate        | 0.0003    |
|    loss                 | 5.53e+09  |
|    n_updates            | 11820     |
|    policy_gradient_loss | -2.48e-06 |
|    std                  | 1         |
|    value_loss           | 1.56e+10  |
---------------------------------------
Episo

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2019.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.63e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1190      |
|    time_elapsed         | 32954     |
|    total_timesteps      | 2437120   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0267    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.57e+09  |
|    n_updates            | 11890     |
|    policy_gradient_loss | -8.26e-07 |
|    std                  | 1         |
|    value_loss           | 9.24e+09  |
---------------------------------------
Episo

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.59e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1197      |
|    time_elapsed         | 33142     |
|    total_timesteps      | 2451456   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0251    |
|    learning_rate        | 0.0003    |
|    loss                 | 3.62e+09  |
|    n_updates            | 11960     |
|    policy_gradient_loss | -2.45e-06 |
|    std                  | 1         |
|    value_loss           | 1.28e+10  |
---------------------------------------
Episode 982  finished with cumulative reward: -5864000.0 and 
with an average reward of: -2344.6621351459416
number of s

Episode 987  finished with cumulative reward: -6731000.0 and 
with an average reward of: -2691.3234706117555
number of steps in this episode: 2501
total steps till now: 2465986
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2692.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.62e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1205      |
|    time_elapsed         | 33354     |
|    total_timesteps      | 2467840   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0269    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2712.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.56e+06 |
| time/                   |           |
|    fps                  | 73        |
|    iterations           | 1212      |
|    time_elapsed         | 33544     |
|    total_timesteps      | 2482176   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0268    |
|    learning_rate        | 0.0003    |
|    loss                 | 4.89e+09  |
|    n_updates            | 12110     |
|    policy_gradient_loss | -4.7e-06  |
|    std                  | 1         |
|    value_loss           | 1.55e+10  |
---------------------------------------
Episo

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.51e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1219      |
|    time_elapsed         | 33735     |
|    total_timesteps      | 2496512   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0225    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.04e+10  |
|    n_updates            | 12180     |
|    policy_gradient_loss | -1.91e-06 |
|    std                  | 1         |
|    value_loss           | 1.91e+10  |
---------------------------------------
Episode 1000  finished with cumulative reward: -4410500.0 and 
with an average reward of: -1763.4946021591363
number of 

Episode 1005  finished with cumulative reward: -6782000.0 and 
with an average reward of: -2711.71531387445
number of steps in this episode: 2501
total steps till now: 2511004
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2712.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.39e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1227      |
|    time_elapsed         | 33947     |
|    total_timesteps      | 2512896   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.017     |
|    learning_rate        | 0.0003    |
|    loss                 

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2957.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.44e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1234      |
|    time_elapsed         | 34134     |
|    total_timesteps      | 2527232   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0258    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.02e+10  |
|    n_updates            | 12330     |
|    policy_gradient_loss | -1.86e-06 |
|    std                  | 1         |
|    value_loss           | 1.9e+10   |
---------------------------------------
Episo

Episode 1017  finished with cumulative reward: -4614500.0 and 
with an average reward of: -1845.061975209916
number of steps in this episode: 2501
total steps till now: 2541016
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1845.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.36e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1241      |
|    time_elapsed         | 34324     |
|    total_timesteps      | 2541568   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0303    |
|    learning_rate        | 0.0003    |
|    loss                

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.26e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1248      |
|    time_elapsed         | 34506     |
|    total_timesteps      | 2555904   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0273    |
|    learning_rate        | 0.0003    |
|    loss                 | 5.7e+09   |
|    n_updates            | 12470     |
|    policy_gradient_loss | -1.55e-06 |
|    std                  | 1         |
|    value_loss           | 1.29e+10  |
---------------------------------------
Episode 1023  finished with cumulative reward: -5226500.0 and 
with an average reward of: -2089.764094362255
number of s

Episode 1029  finished with cumulative reward: -3722000.0 and 
with an average reward of: -1488.2047181127548
number of steps in this episode: 2501
total steps till now: 2571028
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1488.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.2e+06  |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1256      |
|    time_elapsed         | 34725     |
|    total_timesteps      | 2572288   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0261    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1035  finished with cumulative reward: -8720000.0 and 
with an average reward of: -3486.605357856857
number of steps in this episode: 2501
total steps till now: 2586034
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3488.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.24e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1263      |
|    time_elapsed         | 34915     |
|    total_timesteps      | 2586624   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0289    |
|    learning_rate        | 0.0003    |
|    loss                

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.27e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1270      |
|    time_elapsed         | 35098     |
|    total_timesteps      | 2600960   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0283    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.36e+10  |
|    n_updates            | 12690     |
|    policy_gradient_loss | -1.81e-06 |
|    std                  | 1         |
|    value_loss           | 2.01e+10  |
---------------------------------------
Episode 1041  finished with cumulative reward: -6068000.0 and 
with an average reward of: -2426.2295081967213
number of 

Episode 1047  finished with cumulative reward: -3186500.0 and 
with an average reward of: -1274.0903638544582
number of steps in this episode: 2501
total steps till now: 2616046
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1274.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.25e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1278      |
|    time_elapsed         | 35316     |
|    total_timesteps      | 2617344   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0306    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1053  finished with cumulative reward: -5150000.0 and 
with an average reward of: -2059.1763294682128
number of steps in this episode: 2501
total steps till now: 2631052
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2060.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.36e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1285      |
|    time_elapsed         | 35507     |
|    total_timesteps      | 2631680   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0282    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1059  finished with cumulative reward: -7623500.0 and 
with an average reward of: -3048.1807277089165
number of steps in this episode: 2501
total steps till now: 2646058
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3049.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.61e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1293      |
|    time_elapsed         | 35718     |
|    total_timesteps      | 2648064   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0213    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1065  finished with cumulative reward: -7700000.0 and 
with an average reward of: -3078.7684926029588
number of steps in this episode: 2501
total steps till now: 2661064
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3080.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.79e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1300      |
|    time_elapsed         | 35909     |
|    total_timesteps      | 2662400   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0318    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1071  finished with cumulative reward: -8516000.0 and 
with an average reward of: -3405.0379848060775
number of steps in this episode: 2501
total steps till now: 2676070
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3406.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.81e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1307      |
|    time_elapsed         | 36099     |
|    total_timesteps      | 2676736   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0246    |
|    learning_rate        | 0.0003    |
|    loss               

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.94e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1314      |
|    time_elapsed         | 36281     |
|    total_timesteps      | 2691072   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0228    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.41e+10  |
|    n_updates            | 13130     |
|    policy_gradient_loss | -1.43e-06 |
|    std                  | 1         |
|    value_loss           | 2.49e+10  |
---------------------------------------
Episode 1077  finished with cumulative reward: -9230000.0 and 
with an average reward of: -3690.5237904838064
number of 

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.99e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1321      |
|    time_elapsed         | 36471     |
|    total_timesteps      | 2705408   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0275    |
|    learning_rate        | 0.0003    |
|    loss                 | 6.01e+08  |
|    n_updates            | 13200     |
|    policy_gradient_loss | -2.73e-06 |
|    std                  | 1         |
|    value_loss           | 6.22e+09  |
---------------------------------------
Episode 1083  finished with cumulative reward: -5660000.0 and 
with an average reward of: -2263.094762095162
number of s

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.92e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1328      |
|    time_elapsed         | 36662     |
|    total_timesteps      | 2719744   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0258    |
|    learning_rate        | 0.0003    |
|    loss                 | 2.14e+09  |
|    n_updates            | 13270     |
|    policy_gradient_loss | -1.36e-06 |
|    std                  | 1         |
|    value_loss           | 9.23e+09  |
---------------------------------------
Episode 1089  finished with cumulative reward: -4793000.0 and 
with an average reward of: -1916.4334266293483
number of 

Episode 1095  finished with cumulative reward: -4538000.0 and 
with an average reward of: -1814.4742103158737
number of steps in this episode: 2501
total steps till now: 2736094
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1815.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.19e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1336      |
|    time_elapsed         | 36881     |
|    total_timesteps      | 2736128   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0287    |
|    learning_rate        | 0.0003    |
|    loss               

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -519.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.18e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1343      |
|    time_elapsed         | 37064     |
|    total_timesteps      | 2750464   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0317    |
|    learning_rate        | 0.0003    |
|    loss                 | 3.54e+09  |
|    n_updates            | 13420     |
|    policy_gradient_loss | -1.58e-06 |
|    std                  | 1         |
|    value_loss           | 4.29e+09  |
---------------------------------------
Episod

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1080.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.2e+06  |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1350      |
|    time_elapsed         | 37252     |
|    total_timesteps      | 2764800   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0267    |
|    learning_rate        | 0.0003    |
|    loss                 | 3.93e+09  |
|    n_updates            | 13490     |
|    policy_gradient_loss | -2.36e-06 |
|    std                  | 1         |
|    value_loss           | 1e+10     |
---------------------------------------
Episo

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1366.4 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -6.23e+06     |
| time/                   |               |
|    fps                  | 74            |
|    iterations           | 1357          |
|    time_elapsed         | 37443         |
|    total_timesteps      | 2779136       |
| train/                  |               |
|    approx_kl            | 6.1118044e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0393        |
|    learning_rate        | 0.0003        |
|    loss                 | 9.57e+08      |
|    n_updates            | 13560         |
|    policy_gradient_loss | -7.69e-06     |
|    std                  | 1             |
|    

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6.19e+06    |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1364         |
|    time_elapsed         | 37627        |
|    total_timesteps      | 2793472      |
| train/                  |              |
|    approx_kl            | 2.910383e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0299       |
|    learning_rate        | 0.0003       |
|    loss                 | 4.25e+09     |
|    n_updates            | 13630        |
|    policy_gradient_loss | -1.88e-06    |
|    std                  | 1            |
|    value_loss           | 9.48e+09     |
------------------------------------------
Episode 1118  finished with cumulative reward: -356900

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.19e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1371      |
|    time_elapsed         | 37815     |
|    total_timesteps      | 2807808   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0302    |
|    learning_rate        | 0.0003    |
|    loss                 | 9.56e+09  |
|    n_updates            | 13700     |
|    policy_gradient_loss | -1.07e-06 |
|    std                  | 1         |
|    value_loss           | 1.31e+10  |
---------------------------------------
Episode 1124  finished with cumulative reward: -14381000.0 and 
with an average reward of: -5750.099960015994
number of 

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.24e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1378      |
|    time_elapsed         | 38006     |
|    total_timesteps      | 2822144   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0314    |
|    learning_rate        | 0.0003    |
|    loss                 | 2.89e+09  |
|    n_updates            | 13770     |
|    policy_gradient_loss | -1.81e-06 |
|    std                  | 1         |
|    value_loss           | 5.5e+09   |
---------------------------------------
Episode 1130  finished with cumulative reward: -6731000.0 and 
with an average reward of: -2691.3234706117555
number of 

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.19e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1386      |
|    time_elapsed         | 38216     |
|    total_timesteps      | 2838528   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.026     |
|    learning_rate        | 0.0003    |
|    loss                 | 3.8e+09   |
|    n_updates            | 13850     |
|    policy_gradient_loss | -3.41e-06 |
|    std                  | 1         |
|    value_loss           | 1.42e+10  |
---------------------------------------
Episode 1136  finished with cumulative reward: -2931500.0 and 
with an average reward of: -1172.1311475409836
number of 

Episode 1142  finished with cumulative reward: -4971500.0 and 
with an average reward of: -1987.8048780487804
number of steps in this episode: 2501
total steps till now: 2853641
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1988.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.1e+06  |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1394      |
|    time_elapsed         | 38435     |
|    total_timesteps      | 2854912   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0405    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1148  finished with cumulative reward: -1044500.0 and 
with an average reward of: -417.6329468212715
number of steps in this episode: 2501
total steps till now: 2868647
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -417.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.96e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1401      |
|    time_elapsed         | 38626     |
|    total_timesteps      | 2869248   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0381    |
|    learning_rate        | 0.0003    |
|    loss                 

Episode 1154  finished with cumulative reward: -7343000.0 and 
with an average reward of: -2936.0255897640945
number of steps in this episode: 2501
total steps till now: 2883653
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2937.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.89e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1409      |
|    time_elapsed         | 38837     |
|    total_timesteps      | 2885632   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0265    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1160  finished with cumulative reward: -5991500.0 and 
with an average reward of: -2395.641743302679
number of steps in this episode: 2501
total steps till now: 2898659
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2396.6 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.78e+06    |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1416         |
|    time_elapsed         | 39027        |
|    total_timesteps      | 2899968      |
| train/                  |              |
|    approx_kl            | 8.731149e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0315       |
|    learning_rate  

Episode 1166  finished with cumulative reward: -6323000.0 and 
with an average reward of: -2528.1887245101957
number of steps in this episode: 2501
total steps till now: 2913665
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2529.2 
 smooth: 0.0 
 symmetry: 0.0
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 2.5e+03  |
|    ep_rew_mean          | -5.6e+06 |
| time/                   |          |
|    fps                  | 74       |
|    iterations           | 1423     |
|    time_elapsed         | 39217    |
|    total_timesteps      | 2914304  |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -17      |
|    explained_variance   | 0.0318   |
|    learning_rate        | 0.0003   |
|    loss                 | 3.87e+09 |
|

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.61e+06     |
| time/                   |               |
|    fps                  | 74            |
|    iterations           | 1430          |
|    time_elapsed         | 39400         |
|    total_timesteps      | 2928640       |
| train/                  |               |
|    approx_kl            | 1.8335413e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0606        |
|    learning_rate        | 0.0003        |
|    loss                 | 1.45e+08      |
|    n_updates            | 14290         |
|    policy_gradient_loss | -4.78e-06     |
|    std                  | 1             |
|    value_loss           | 1.11e+09      |
-------------------------------------------
Episode 1172  finished with cumu

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -723.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.34e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1437      |
|    time_elapsed         | 39590     |
|    total_timesteps      | 2942976   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0493    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.25e+08  |
|    n_updates            | 14360     |
|    policy_gradient_loss | -1.67e-06 |
|    std                  | 1         |
|    value_loss           | 3.03e+09  |
---------------------------------------
Episod

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.48e+06    |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1444         |
|    time_elapsed         | 39781        |
|    total_timesteps      | 2957312      |
| train/                  |              |
|    approx_kl            | 2.910383e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0234       |
|    learning_rate        | 0.0003       |
|    loss                 | 5.67e+09     |
|    n_updates            | 14430        |
|    policy_gradient_loss | -1.38e-06    |
|    std                  | 1            |
|    value_loss           | 1.09e+10     |
------------------------------------------
Episode 1184  finished with cumulative reward: -747050

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.45e+06     |
| time/                   |               |
|    fps                  | 74            |
|    iterations           | 1451          |
|    time_elapsed         | 39971         |
|    total_timesteps      | 2971648       |
| train/                  |               |
|    approx_kl            | 3.7252903e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0368        |
|    learning_rate        | 0.0003        |
|    loss                 | 4.57e+09      |
|    n_updates            | 14500         |
|    policy_gradient_loss | -1.02e-05     |
|    std                  | 1             |
|    value_loss           | 7.02e+09      |
-------------------------------------------
Episode 1190  finished with cumu

Episode 1195  finished with cumulative reward: -7980500.0 and 
with an average reward of: -3190.9236305477807
number of steps in this episode: 2501
total steps till now: 2986194
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3192.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.2e+06  |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1459      |
|    time_elapsed         | 40183     |
|    total_timesteps      | 2988032   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.032     |
|    learning_rate        | 0.0003    |
|    loss               

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1040.0 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.31e+06    |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1466         |
|    time_elapsed         | 40373        |
|    total_timesteps      | 3002368      |
| train/                  |              |
|    approx_kl            | 5.820766e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0343       |
|    learning_rate        | 0.0003       |
|    loss                 | 1.17e+10     |
|    n_updates            | 14650        |
|    policy_gradient_loss | -4.53e-06    |
|    std                  | 1            |
|    value_loss          

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -4191.8 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.26e+06     |
| time/                   |               |
|    fps                  | 74            |
|    iterations           | 1473          |
|    time_elapsed         | 40564         |
|    total_timesteps      | 3016704       |
| train/                  |               |
|    approx_kl            | 1.7462298e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0405        |
|    learning_rate        | 0.0003        |
|    loss                 | 6.68e+09      |
|    n_updates            | 14720         |
|    policy_gradient_loss | -6.71e-06     |
|    std                  | 1             |
|    

Episode 1213  finished with cumulative reward: -2957000.0 and 
with an average reward of: -1182.327069172331
number of steps in this episode: 2501
total steps till now: 3031212
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1182.8 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.25e+06    |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1481         |
|    time_elapsed         | 40775        |
|    total_timesteps      | 3033088      |
| train/                  |              |
|    approx_kl            | 8.731149e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0346       |
|    learning_rate  

Episode 1219  finished with cumulative reward: -9587000.0 and 
with an average reward of: -3833.266693322671
number of steps in this episode: 2501
total steps till now: 3046218
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3834.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.31e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1488      |
|    time_elapsed         | 40964     |
|    total_timesteps      | 3047424   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0249    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 1225  finished with cumulative reward: -6476000.0 and 
with an average reward of: -2589.3642542982807
number of steps in this episode: 2501
total steps till now: 3061224
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2590.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.26e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1495      |
|    time_elapsed         | 41154     |
|    total_timesteps      | 3061760   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0377    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1231  finished with cumulative reward: -3569000.0 and 
with an average reward of: -1427.02918832467
number of steps in this episode: 2501
total steps till now: 3076230
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1427.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.4e+06  |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1503      |
|    time_elapsed         | 41364     |
|    total_timesteps      | 3078144   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0381    |
|    learning_rate        | 0.0003    |
|    loss                 

Episode 1237  finished with cumulative reward: -4869500.0 and 
with an average reward of: -1947.0211915233906
number of steps in this episode: 2501
total steps till now: 3091236
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1947.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.42e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1510      |
|    time_elapsed         | 41554     |
|    total_timesteps      | 3092480   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0241    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1243  finished with cumulative reward: -4793000.0 and 
with an average reward of: -1916.4334266293483
number of steps in this episode: 2501
total steps till now: 3106242
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1917.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.39e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1517      |
|    time_elapsed         | 41744     |
|    total_timesteps      | 3106816   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0344    |
|    learning_rate        | 0.0003    |
|    loss               

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.51e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1524      |
|    time_elapsed         | 41926     |
|    total_timesteps      | 3121152   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0305    |
|    learning_rate        | 0.0003    |
|    loss                 | 9.58e+09  |
|    n_updates            | 15230     |
|    policy_gradient_loss | -1.45e-06 |
|    std                  | 1         |
|    value_loss           | 2e+10     |
---------------------------------------
Episode 1249  finished with cumulative reward: -6374000.0 and 
with an average reward of: -2548.580567772891
number of s

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.59e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1531      |
|    time_elapsed         | 42115     |
|    total_timesteps      | 3135488   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0261    |
|    learning_rate        | 0.0003    |
|    loss                 | 8.42e+09  |
|    n_updates            | 15300     |
|    policy_gradient_loss | -7.74e-07 |
|    std                  | 1         |
|    value_loss           | 2.83e+10  |
---------------------------------------
Episode 1255  finished with cumulative reward: -6144500.0 and 
with an average reward of: -2456.8172730907636
number of 

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.53e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1538      |
|    time_elapsed         | 42306     |
|    total_timesteps      | 3149824   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0299    |
|    learning_rate        | 0.0003    |
|    loss                 | 2.35e+09  |
|    n_updates            | 15370     |
|    policy_gradient_loss | -1.45e-06 |
|    std                  | 1         |
|    value_loss           | 1.57e+10  |
---------------------------------------
Episode 1261  finished with cumulative reward: -6476000.0 and 
with an average reward of: -2589.3642542982807
number of 

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.62e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1545      |
|    time_elapsed         | 42495     |
|    total_timesteps      | 3164160   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0298    |
|    learning_rate        | 0.0003    |
|    loss                 | 5.22e+09  |
|    n_updates            | 15440     |
|    policy_gradient_loss | -1.21e-06 |
|    std                  | 1         |
|    value_loss           | 1.07e+10  |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |


Episode 1272  finished with cumulative reward: -2039000.0 and 
with an average reward of: -815.2738904438224
number of steps in this episode: 2501
total steps till now: 3178771
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -815.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.56e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1553      |
|    time_elapsed         | 42703     |
|    total_timesteps      | 3180544   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.036     |
|    learning_rate        | 0.0003    |
|    loss                 

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2508.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.74e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1560      |
|    time_elapsed         | 42893     |
|    total_timesteps      | 3194880   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0326    |
|    learning_rate        | 0.0003    |
|    loss                 | 4.61e+09  |
|    n_updates            | 15590     |
|    policy_gradient_loss | -1.43e-06 |
|    std                  | 1         |
|    value_loss           | 8.1e+09   |
---------------------------------------
Episo

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2366.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.56e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1567      |
|    time_elapsed         | 43082     |
|    total_timesteps      | 3209216   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0433    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.64e+09  |
|    n_updates            | 15660     |
|    policy_gradient_loss | -1.77e-06 |
|    std                  | 1         |
|    value_loss           | 6.25e+09  |
---------------------------------------
-----

Episode 1290  finished with cumulative reward: -10224500.0 and 
with an average reward of: -4088.1647341063576
number of steps in this episode: 2501
total steps till now: 3223789
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -4089.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.64e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1575      |
|    time_elapsed         | 43293     |
|    total_timesteps      | 3225600   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0367    |
|    learning_rate        | 0.0003    |
|    loss              

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -846.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.67e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1582      |
|    time_elapsed         | 43482     |
|    total_timesteps      | 3239936   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.032     |
|    learning_rate        | 0.0003    |
|    loss                 | 2.98e+09  |
|    n_updates            | 15810     |
|    policy_gradient_loss | -1.32e-06 |
|    std                  | 1         |
|    value_loss           | 6.34e+09  |
---------------------------------------
Episod

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2264.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.51e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1589      |
|    time_elapsed         | 43671     |
|    total_timesteps      | 3254272   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0333    |
|    learning_rate        | 0.0003    |
|    loss                 | 5.16e+09  |
|    n_updates            | 15880     |
|    policy_gradient_loss | -1.02e-06 |
|    std                  | 1         |
|    value_loss           | 1.05e+10  |
---------------------------------------
Episo

Episode 1308  finished with cumulative reward: -7292000.0 and 
with an average reward of: -2915.6337465013994
number of steps in this episode: 2501
total steps till now: 3268807
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2916.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.52e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1597      |
|    time_elapsed         | 43883     |
|    total_timesteps      | 3270656   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0326    |
|    learning_rate        | 0.0003    |
|    loss               

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -458.6 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.55e+06    |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1604         |
|    time_elapsed         | 44074        |
|    total_timesteps      | 3284992      |
| train/                  |              |
|    approx_kl            | 2.910383e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0471       |
|    learning_rate        | 0.0003       |
|    loss                 | 3.11e+09     |
|    n_updates            | 16030        |
|    policy_gradient_loss | -3.38e-06    |
|    std                  | 1            |
|    value_loss           

Episode 1320  finished with cumulative reward: -9102500.0 and 
with an average reward of: -3639.5441823270694
number of steps in this episode: 2501
total steps till now: 3298819
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3641.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.58e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1611      |
|    time_elapsed         | 44265     |
|    total_timesteps      | 3299328   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0366    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1326  finished with cumulative reward: -1044500.0 and 
with an average reward of: -417.6329468212715
number of steps in this episode: 2501
total steps till now: 3313825
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -417.8 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.59e+06     |
| time/                   |               |
|    fps                  | 74            |
|    iterations           | 1619          |
|    time_elapsed         | 44478         |
|    total_timesteps      | 3315712       |
| train/                  |               |
|    approx_kl            | 1.4551915e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0399        |
|    l

Episode 1332  finished with cumulative reward: -6476000.0 and 
with an average reward of: -2589.3642542982807
number of steps in this episode: 2501
total steps till now: 3328831
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2590.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.6e+06  |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1626      |
|    time_elapsed         | 44669     |
|    total_timesteps      | 3330048   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0357    |
|    learning_rate        | 0.0003    |
|    loss               

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2172.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.64e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1633      |
|    time_elapsed         | 44859     |
|    total_timesteps      | 3344384   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0314    |
|    learning_rate        | 0.0003    |
|    loss                 | 9.68e+09  |
|    n_updates            | 16320     |
|    policy_gradient_loss | -2.13e-06 |
|    std                  | 1         |
|    value_loss           | 1.42e+10  |
---------------------------------------
Episo

Episode 1344  finished with cumulative reward: -3441500.0 and 
with an average reward of: -1376.0495801679328
number of steps in this episode: 2501
total steps till now: 3358843
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1376.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.67e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1641      |
|    time_elapsed         | 45069     |
|    total_timesteps      | 3360768   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0324    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1350  finished with cumulative reward: -6246500.0 and 
with an average reward of: -2497.6009596161534
number of steps in this episode: 2501
total steps till now: 3373849
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2498.6 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.88e+06    |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1648         |
|    time_elapsed         | 45259        |
|    total_timesteps      | 3375104      |
| train/                  |              |
|    approx_kl            | 2.910383e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0357       |
|    learning_rate 

Episode 1356  finished with cumulative reward: -4665500.0 and 
with an average reward of: -1865.453818472611
number of steps in this episode: 2501
total steps till now: 3388855
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1866.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.85e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1655      |
|    time_elapsed         | 45450     |
|    total_timesteps      | 3389440   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0272    |
|    learning_rate        | 0.0003    |
|    loss                

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.89e+06    |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1662         |
|    time_elapsed         | 45633        |
|    total_timesteps      | 3403776      |
| train/                  |              |
|    approx_kl            | 5.820766e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0389       |
|    learning_rate        | 0.0003       |
|    loss                 | 2.06e+09     |
|    n_updates            | 16610        |
|    policy_gradient_loss | -1.55e-06    |
|    std                  | 1            |
|    value_loss           | 5.85e+09     |
------------------------------------------
Episode 1362  finished with cumulative reward: -377300

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1427.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.95e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1669      |
|    time_elapsed         | 45821     |
|    total_timesteps      | 3418112   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.024     |
|    learning_rate        | 0.0003    |
|    loss                 | 2.22e+09  |
|    n_updates            | 16680     |
|    policy_gradient_loss | -9.17e-07 |
|    std                  | 1         |
|    value_loss           | 8.37e+09  |
---------------------------------------
Episo

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.07e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1676      |
|    time_elapsed         | 46011     |
|    total_timesteps      | 3432448   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0333    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.02e+09  |
|    n_updates            | 16750     |
|    policy_gradient_loss | -2.19e-06 |
|    std                  | 1         |
|    value_loss           | 4.75e+09  |
---------------------------------------
Episode 1374  finished with cumulative reward: -8516000.0 and 
with an average reward of: -3405.0379848060775
number of 

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3600.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.1e+06  |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1683      |
|    time_elapsed         | 46203     |
|    total_timesteps      | 3446784   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.042     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.29e+09  |
|    n_updates            | 16820     |
|    policy_gradient_loss | -3.01e-06 |
|    std                  | 1         |
|    value_loss           | 4.48e+09  |
---------------------------------------
-----

Episode 1385  finished with cumulative reward: -8541500.0 and 
with an average reward of: -3415.233906437425
number of steps in this episode: 2501
total steps till now: 3461384
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3416.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.16e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1691      |
|    time_elapsed         | 46414     |
|    total_timesteps      | 3463168   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0277    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 1391  finished with cumulative reward: -3696500.0 and 
with an average reward of: -1478.0087964814074
number of steps in this episode: 2501
total steps till now: 3476390
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1478.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.08e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1698      |
|    time_elapsed         | 46604     |
|    total_timesteps      | 3477504   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0426    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1397  finished with cumulative reward: -7266500.0 and 
with an average reward of: -2905.437824870052
number of steps in this episode: 2501
total steps till now: 3491396
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2906.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.06e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1705      |
|    time_elapsed         | 46795     |
|    total_timesteps      | 3491840   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0381    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 1403  finished with cumulative reward: -4359500.0 and 
with an average reward of: -1743.1027588964414
number of steps in this episode: 2501
total steps till now: 3506402
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1743.8 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6.3e+06     |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1713         |
|    time_elapsed         | 47006        |
|    total_timesteps      | 3508224      |
| train/                  |              |
|    approx_kl            | 2.910383e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0327       |
|    learning_rate 

Episode 1409  finished with cumulative reward: -9000500.0 and 
with an average reward of: -3598.7604958016796
number of steps in this episode: 2501
total steps till now: 3521408
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3600.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.3e+06  |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1720      |
|    time_elapsed         | 47196     |
|    total_timesteps      | 3522560   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0355    |
|    learning_rate        | 0.0003    |
|    loss               

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -825.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.21e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1727      |
|    time_elapsed         | 47388     |
|    total_timesteps      | 3536896   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0421    |
|    learning_rate        | 0.0003    |
|    loss                 | 6.62e+08  |
|    n_updates            | 17260     |
|    policy_gradient_loss | -2.23e-06 |
|    std                  | 1         |
|    value_loss           | 2.45e+09  |
---------------------------------------
Episod

Episode 1421  finished with cumulative reward: -6782000.0 and 
with an average reward of: -2711.71531387445
number of steps in this episode: 2501
total steps till now: 3551420
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2712.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.16e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1735      |
|    time_elapsed         | 47600     |
|    total_timesteps      | 3553280   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0279    |
|    learning_rate        | 0.0003    |
|    loss                 

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2906.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.19e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1742      |
|    time_elapsed         | 47789     |
|    total_timesteps      | 3567616   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0379    |
|    learning_rate        | 0.0003    |
|    loss                 | 3.4e+09   |
|    n_updates            | 17410     |
|    policy_gradient_loss | -1.39e-06 |
|    std                  | 1         |
|    value_loss           | 1.31e+10  |
---------------------------------------
Episo

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2631.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.04e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1749      |
|    time_elapsed         | 47977     |
|    total_timesteps      | 3581952   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0215    |
|    learning_rate        | 0.0003    |
|    loss                 | 4.91e+09  |
|    n_updates            | 17480     |
|    policy_gradient_loss | -1.16e-06 |
|    std                  | 1         |
|    value_loss           | 1.43e+10  |
---------------------------------------
Episo

Episode 1439  finished with cumulative reward: -7521500.0 and 
with an average reward of: -3007.3970411835267
number of steps in this episode: 2501
total steps till now: 3596438
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3008.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.02e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1757      |
|    time_elapsed         | 48187     |
|    total_timesteps      | 3598336   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0299    |
|    learning_rate        | 0.0003    |
|    loss               

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -479.0 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -6.04e+06     |
| time/                   |               |
|    fps                  | 74            |
|    iterations           | 1764          |
|    time_elapsed         | 48376         |
|    total_timesteps      | 3612672       |
| train/                  |               |
|    approx_kl            | 1.0186341e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0495        |
|    learning_rate        | 0.0003        |
|    loss                 | 2.34e+08      |
|    n_updates            | 17630         |
|    policy_gradient_loss | -9.65e-06     |
|    std                  | 1             |
|    v

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2406.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.88e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1771      |
|    time_elapsed         | 48562     |
|    total_timesteps      | 3627008   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0303    |
|    learning_rate        | 0.0003    |
|    loss                 | 5.89e+09  |
|    n_updates            | 17700     |
|    policy_gradient_loss | -1.48e-06 |
|    std                  | 1         |
|    value_loss           | 1.16e+10  |
---------------------------------------
Episo

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.71e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1778      |
|    time_elapsed         | 48744     |
|    total_timesteps      | 3641344   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0385    |
|    learning_rate        | 0.0003    |
|    loss                 | 5.9e+09   |
|    n_updates            | 17770     |
|    policy_gradient_loss | -2.84e-06 |
|    std                  | 1         |
|    value_loss           | 1.24e+10  |
---------------------------------------
Episode 1457  finished with cumulative reward: -8337500.0 and 
with an average reward of: -3333.6665333866454
number of 

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.83e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1785      |
|    time_elapsed         | 48932     |
|    total_timesteps      | 3655680   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0392    |
|    learning_rate        | 0.0003    |
|    loss                 | 6.71e+09  |
|    n_updates            | 17840     |
|    policy_gradient_loss | -2.53e-06 |
|    std                  | 1         |
|    value_loss           | 1.22e+10  |
---------------------------------------
Episode 1463  finished with cumulative reward: -4079000.0 and 
with an average reward of: -1630.9476209516195
number of 

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2896.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.82e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1792      |
|    time_elapsed         | 49123     |
|    total_timesteps      | 3670016   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0355    |
|    learning_rate        | 0.0003    |
|    loss                 | 5.87e+09  |
|    n_updates            | 17910     |
|    policy_gradient_loss | -1.18e-06 |
|    std                  | 1         |
|    value_loss           | 1.48e+10  |
---------------------------------------
Episo

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.75e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1799      |
|    time_elapsed         | 49313     |
|    total_timesteps      | 3684352   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0426    |
|    learning_rate        | 0.0003    |
|    loss                 | 2.52e+09  |
|    n_updates            | 17980     |
|    policy_gradient_loss | -2.65e-06 |
|    std                  | 1         |
|    value_loss           | 8.22e+09  |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |


Episode 1480  finished with cumulative reward: -10836500.0 and 
with an average reward of: -4332.866853258696
number of steps in this episode: 2501
total steps till now: 3698979
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -4334.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.79e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1807      |
|    time_elapsed         | 49524     |
|    total_timesteps      | 3700736   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0312    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1486  finished with cumulative reward: -5379500.0 and 
with an average reward of: -2150.93962415034
number of steps in this episode: 2501
total steps till now: 3713985
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2151.8 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.63e+06    |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1814         |
|    time_elapsed         | 49715        |
|    total_timesteps      | 3715072      |
| train/                  |              |
|    approx_kl            | 8.731149e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0365       |
|    learning_rate   

Episode 1492  finished with cumulative reward: -4155500.0 and 
with an average reward of: -1661.5353858456617
number of steps in this episode: 2501
total steps till now: 3728991
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1662.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.64e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1821      |
|    time_elapsed         | 49904     |
|    total_timesteps      | 3729408   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0327    |
|    learning_rate        | 0.0003    |
|    loss               

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.77e+06     |
| time/                   |               |
|    fps                  | 74            |
|    iterations           | 1828          |
|    time_elapsed         | 50087         |
|    total_timesteps      | 3743744       |
| train/                  |               |
|    approx_kl            | 6.6065695e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0552        |
|    learning_rate        | 0.0003        |
|    loss                 | 2.05e+09      |
|    n_updates            | 18270         |
|    policy_gradient_loss | -6.78e-06     |
|    std                  | 1             |
|    value_loss           | 3.74e+09      |
-------------------------------------------
Episode 1498  finished with cumu

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3865.4 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.63e+06    |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1835         |
|    time_elapsed         | 50278        |
|    total_timesteps      | 3758080      |
| train/                  |              |
|    approx_kl            | 9.313226e-10 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0388       |
|    learning_rate        | 0.0003       |
|    loss                 | 1.21e+10     |
|    n_updates            | 18340        |
|    policy_gradient_loss | -6.75e-06    |
|    std                  | 1            |
|    value_loss          

Episode 1509  finished with cumulative reward: -4410500.0 and 
with an average reward of: -1763.4946021591363
number of steps in this episode: 2501
total steps till now: 3771508
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1764.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.53e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1842      |
|    time_elapsed         | 50468     |
|    total_timesteps      | 3772416   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0441    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1515  finished with cumulative reward: -8745500.0 and 
with an average reward of: -3496.8012794882047
number of steps in this episode: 2501
total steps till now: 3786514
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3498.2 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.67e+06     |
| time/                   |               |
|    fps                  | 74            |
|    iterations           | 1849          |
|    time_elapsed         | 50658         |
|    total_timesteps      | 3786752       |
| train/                  |               |
|    approx_kl            | 1.1641532e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0522        |
|   

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.63e+06    |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1856         |
|    time_elapsed         | 50840        |
|    total_timesteps      | 3801088      |
| train/                  |              |
|    approx_kl            | 4.656613e-10 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0629       |
|    learning_rate        | 0.0003       |
|    loss                 | 9.76e+08     |
|    n_updates            | 18550        |
|    policy_gradient_loss | -9.31e-06    |
|    std                  | 1            |
|    value_loss           | 3.08e+09     |
------------------------------------------
Episode 1521  finished with cumulative reward: -770000

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1652.0 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.67e+06    |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1863         |
|    time_elapsed         | 51031        |
|    total_timesteps      | 3815424      |
| train/                  |              |
|    approx_kl            | 2.910383e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0427       |
|    learning_rate        | 0.0003       |
|    loss                 | 3.86e+09     |
|    n_updates            | 18620        |
|    policy_gradient_loss | -5.38e-06    |
|    std                  | 1            |
|    value_loss          

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3222.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.72e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1870      |
|    time_elapsed         | 51223     |
|    total_timesteps      | 3829760   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0289    |
|    learning_rate        | 0.0003    |
|    loss                 | 9.17e+09  |
|    n_updates            | 18690     |
|    policy_gradient_loss | -9.81e-07 |
|    std                  | 1         |
|    value_loss           | 1.77e+10  |
---------------------------------------
Episo

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2253.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.82e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1877      |
|    time_elapsed         | 51412     |
|    total_timesteps      | 3844096   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0325    |
|    learning_rate        | 0.0003    |
|    loss                 | 2.42e+09  |
|    n_updates            | 18760     |
|    policy_gradient_loss | -2.34e-06 |
|    std                  | 1         |
|    value_loss           | 9.21e+09  |
---------------------------------------
-----

Episode 1544  finished with cumulative reward: -3237500.0 and 
with an average reward of: -1294.4822071171532
number of steps in this episode: 2501
total steps till now: 3859043
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1295.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.67e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1885      |
|    time_elapsed         | 51623     |
|    total_timesteps      | 3860480   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0438    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1550  finished with cumulative reward: -8745500.0 and 
with an average reward of: -3496.8012794882047
number of steps in this episode: 2501
total steps till now: 3874049
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3498.2 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.7e+06     |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1892         |
|    time_elapsed         | 51813        |
|    total_timesteps      | 3874816      |
| train/                  |              |
|    approx_kl            | 8.731149e-10 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0351       |
|    learning_rate 

Episode 1556  finished with cumulative reward: -4359500.0 and 
with an average reward of: -1743.1027588964414
number of steps in this episode: 2501
total steps till now: 3889055
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1743.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.73e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1899      |
|    time_elapsed         | 52005     |
|    total_timesteps      | 3889152   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0354    |
|    learning_rate        | 0.0003    |
|    loss               

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.69e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1906      |
|    time_elapsed         | 52188     |
|    total_timesteps      | 3903488   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0329    |
|    learning_rate        | 0.0003    |
|    loss                 | 4.51e+09  |
|    n_updates            | 19050     |
|    policy_gradient_loss | -9.5e-07  |
|    std                  | 1         |
|    value_loss           | 7.13e+09  |
---------------------------------------
Episode 1562  finished with cumulative reward: -6986000.0 and 
with an average reward of: -2793.28268692523
number of st

Episode 1568  finished with cumulative reward: -8975000.0 and 
with an average reward of: -3588.564574170332
number of steps in this episode: 2501
total steps till now: 3919067
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3590.0 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -5.71e+06    |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1914         |
|    time_elapsed         | 52406        |
|    total_timesteps      | 3919872      |
| train/                  |              |
|    approx_kl            | 2.910383e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.036        |
|    learning_rate  

Episode 1574  finished with cumulative reward: -6017000.0 and 
with an average reward of: -2405.837664934026
number of steps in this episode: 2501
total steps till now: 3934073
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2406.8 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.8e+06      |
| time/                   |               |
|    fps                  | 74            |
|    iterations           | 1921          |
|    time_elapsed         | 52595         |
|    total_timesteps      | 3934208       |
| train/                  |               |
|    approx_kl            | 1.1641532e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0299        |
|    

Episode 1579  finished with cumulative reward: -6501500.0 and 
with an average reward of: -2599.5601759296283
number of steps in this episode: 2501
total steps till now: 3946578
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2600.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.91e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1928      |
|    time_elapsed         | 52778     |
|    total_timesteps      | 3948544   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.04      |
|    learning_rate        | 0.0003    |
|    loss               

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2080.4 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.86e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1935      |
|    time_elapsed         | 52968     |
|    total_timesteps      | 3962880   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0364    |
|    learning_rate        | 0.0003    |
|    loss                 | 3.9e+09   |
|    n_updates            | 19340     |
|    policy_gradient_loss | -1.07e-06 |
|    std                  | 1         |
|    value_loss           | 8.6e+09   |
---------------------------------------
Episo

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2406.8 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -5.99e+06     |
| time/                   |               |
|    fps                  | 74            |
|    iterations           | 1942          |
|    time_elapsed         | 53158         |
|    total_timesteps      | 3977216       |
| train/                  |               |
|    approx_kl            | 1.4260877e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0343        |
|    learning_rate        | 0.0003        |
|    loss                 | 1.58e+10      |
|    n_updates            | 19410         |
|    policy_gradient_loss | -6.04e-06     |
|    std                  | 1             |
|    

Episode 1597  finished with cumulative reward: -9842000.0 and 
with an average reward of: -3935.2259096361454
number of steps in this episode: 2501
total steps till now: 3991596
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3936.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.99e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1950      |
|    time_elapsed         | 53370     |
|    total_timesteps      | 3993600   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0278    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1603  finished with cumulative reward: -5660000.0 and 
with an average reward of: -2263.094762095162
number of steps in this episode: 2501
total steps till now: 4006602
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2264.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.97e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1957      |
|    time_elapsed         | 53559     |
|    total_timesteps      | 4007936   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0409    |
|    learning_rate        | 0.0003    |
|    loss                

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2804.6 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6.15e+06    |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1964         |
|    time_elapsed         | 53749        |
|    total_timesteps      | 4022272      |
| train/                  |              |
|    approx_kl            | 5.820766e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.031        |
|    learning_rate        | 0.0003       |
|    loss                 | 7.7e+09      |
|    n_updates            | 19630        |
|    policy_gradient_loss | -2.19e-06    |
|    std                  | 1            |
|    value_loss          

Episode 1615  finished with cumulative reward: -3722000.0 and 
with an average reward of: -1488.2047181127548
number of steps in this episode: 2501
total steps till now: 4036614
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1488.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.19e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1972      |
|    time_elapsed         | 53960     |
|    total_timesteps      | 4038656   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0471    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1621  finished with cumulative reward: -8592500.0 and 
with an average reward of: -3435.6257497001197
number of steps in this episode: 2501
total steps till now: 4051620
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3437.0 
 smooth: 0.0 
 symmetry: 0.0
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.5e+03       |
|    ep_rew_mean          | -6.29e+06     |
| time/                   |               |
|    fps                  | 74            |
|    iterations           | 1979          |
|    time_elapsed         | 54152         |
|    total_timesteps      | 4052992       |
| train/                  |               |
|    approx_kl            | 1.1641532e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -17           |
|    explained_variance   | 0.0314        |
|   

Episode 1627  finished with cumulative reward: -8516000.0 and 
with an average reward of: -3405.0379848060775
number of steps in this episode: 2501
total steps till now: 4066626
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3406.4 
 smooth: 0.0 
 symmetry: 0.0
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.5e+03      |
|    ep_rew_mean          | -6.23e+06    |
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 1986         |
|    time_elapsed         | 54342        |
|    total_timesteps      | 4067328      |
| train/                  |              |
|    approx_kl            | 5.820766e-11 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -17          |
|    explained_variance   | 0.0384       |
|    learning_rate 

Episode 1633  finished with cumulative reward: -2804000.0 and 
with an average reward of: -1121.1515393842462
number of steps in this episode: 2501
total steps till now: 4081632
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1121.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.14e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 1993      |
|    time_elapsed         | 54532     |
|    total_timesteps      | 4081664   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0353    |
|    learning_rate        | 0.0003    |
|    loss               

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.89e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 2000      |
|    time_elapsed         | 54713     |
|    total_timesteps      | 4096000   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.049     |
|    learning_rate        | 0.0003    |
|    loss                 | 2.34e+09  |
|    n_updates            | 19990     |
|    policy_gradient_loss | -3.16e-06 |
|    std                  | 1         |
|    value_loss           | 4.54e+09  |
---------------------------------------
Episode 1639  finished with cumulative reward: -3365000.0 and 
with an average reward of: -1345.4618152738904
number of 

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3498.2 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.94e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 2007      |
|    time_elapsed         | 54904     |
|    total_timesteps      | 4110336   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0326    |
|    learning_rate        | 0.0003    |
|    loss                 | 7.68e+09  |
|    n_updates            | 20060     |
|    policy_gradient_loss | -2.72e-06 |
|    std                  | 1         |
|    value_loss           | 2.28e+10  |
---------------------------------------
Episo

forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3324.8 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.08e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 2014      |
|    time_elapsed         | 55093     |
|    total_timesteps      | 4124672   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0337    |
|    learning_rate        | 0.0003    |
|    loss                 | 1.1e+10   |
|    n_updates            | 20130     |
|    policy_gradient_loss | -1.64e-06 |
|    std                  | 1         |
|    value_loss           | 1.96e+10  |
---------------------------------------
Episo

Episode 1656  finished with cumulative reward: -5405000.0 and 
with an average reward of: -2161.135545781687
number of steps in this episode: 2501
total steps till now: 4139155
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -2162.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.11e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 2022      |
|    time_elapsed         | 55304     |
|    total_timesteps      | 4141056   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0395    |
|    learning_rate        | 0.0003    |
|    loss                

Episode 1662  finished with cumulative reward: -9179000.0 and 
with an average reward of: -3670.1319472211117
number of steps in this episode: 2501
total steps till now: 4154161
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -3671.6 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -6.09e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 2029      |
|    time_elapsed         | 55495     |
|    total_timesteps      | 4155392   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.0358    |
|    learning_rate        | 0.0003    |
|    loss               

Episode 1668  finished with cumulative reward: -2600000.0 and 
with an average reward of: -1039.5841663334666
number of steps in this episode: 2501
total steps till now: 4169167
..........................................
forward : 0.0 
 velocity penalty:0.0 
 stability : 0.0
energy : 0.0 
  fall: -1040.0 
 smooth: 0.0 
 symmetry: 0.0
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.5e+03   |
|    ep_rew_mean          | -5.97e+06 |
| time/                   |           |
|    fps                  | 74        |
|    iterations           | 2036      |
|    time_elapsed         | 55684     |
|    total_timesteps      | 4169728   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.049     |
|    learning_rate        | 0.0003    |
|    loss               

error: Not connected to physics server.

In [None]:
# Save the model with a specific name
model.save("ppo_spot_trial_basic2")

print("Model saved successfully as trial_one.zip.")
