In [1]:
import gymnasium as gym
import math
import random
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count
import numpy as np 

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision

from tqdm import tqdm, trange
import wandb

# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
	from IPython import display

plt.ion()

# if GPU is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
def make_env(env_id, seed, idx, run_name): 
	def thunk(): 
		env = gym.make(env_id, render_mode='rgb_array')
		env = gym.wrappers.RecordEpisodeStatistics(env) 
		env = gym.wrappers.RecordVideo(env, f'videos/ppo/cartpole.mp4')
		# env.seed(seed) 
		env.action_space.seed(seed) 
		env.observation_space.seed(seed) 
		return env 
	return thunk() 

In [3]:
def layer_init(layer, std=np.sqrt(2), bias_const=0.0): 
	nn.init.orthogonal_(layer.weight, std) 
	nn.init.constant_(layer.bias, bias_const)
	return layer 

class Agent(nn.Module): 
	def __init__(self, envs): 
		super().__init__() 
		self.critic = nn.Sequential(
			layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)), 
			nn.Tanh(), 
			layer_init(nn.Linear(64, 64)), 
			nn.Tanh(), 
			layer_init(nn.Linear(64, 1), std=1.0)
		)

		self.actor = nn.Sequential(
			layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)), 
			nn.Tanh(), 
			layer_init(nn.Linear(64, 64)), 
			nn.Tanh(), 
			layer_init(nn.Linear(64, envs.single_action_space.n), std=0.01)
		)

	def get_value(self, x): 
		return self.critic(x) 
	
	def get_action_and_value(self, x, action=None): 
		logits = self.actor(x) 
		probs = torch.distributions.categorical.Categorical(logits=logits) 
		if action is None: 
			action = probs.sample() 
		return action, probs.log_prob(action), probs.entropy(), self.critic(x) 

In [4]:
wandb.init(
    project='CartPole PPO', 
    entity='uuzall', 
    sync_tensorboard=True, 
    name='the first', 
    monitor_gym=True, 
    save_code=True,
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33muuzall[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
writer = torch.utils.tensorboard.SummaryWriter(f'runs/the first')

AttributeError: module 'torch.utils' has no attribute 'tensorboard'

In [5]:
seed = 1337 
num_envs = 4
num_steps = 128
# temp = [make_env('CartPole-v1', seed + i, i, 'the first') for i in range(4)]

envs = gym.vector.SyncVectorEnv([
    lambda: make_env('CartPole-v1', seed+0, 0, 'the first'), 
    lambda: make_env('CartPole-v1', seed+1, 1, 'the first'), 
    lambda: make_env('CartPole-v1', seed+2, 2, 'the first'), 
    lambda: make_env('CartPole-v1', seed+3, 3, 'the first')
])

agent = Agent(envs).to(device) 
optimizer = optim.Adam(agent.parameters(), lr=2.5e-4, eps=1e-5)

  logger.warn(


In [6]:
obs = torch.zeros((num_steps, num_envs) + envs.single_observation_space.shape).to(device)
actions = torch.zeros((num_steps, num_envs) + envs.single_action_space.shape).to(device) 
logprobs = torch.zeros((num_steps, num_envs)).to(device) 
rewards = torch.zeros((num_steps, num_envs)).to(device) 
dones = torch.zeros((num_steps, num_envs)).to(device) 
values = torch.zeros((num_steps, num_envs)).to(device) 

In [11]:
num_steps, num_envs, envs.single_action_space.shape

(128, 4, ())

In [8]:
total_timesteps = 64#*4096
bs = 64
gamma = 0.99
update_epochs = 4 
mini_bs = 4 
clip_coef = 0.2
ent_coef = 0.01 
vf_coef = 0.5

global_step = 0 
next_obs = torch.tensor(envs.reset()[0]).to(device) 
next_done = torch.zeros(num_envs).to(device) 
num_updates = total_timesteps // bs

for update in trange(1, num_updates + 1): 
	for step in range(0, num_steps): 
		global_step += 1 * num_envs 
		obs[step] = next_obs 
		dones[step] = next_done 
		
		with torch.no_grad(): 
			action, logprob, _, value = agent.get_action_and_value(next_obs) 
			values[step] = value.flatten() 
		actions[step] = action 
		logprobs[step] = logprob

		next_obs, reward, terminated, truncated, info = envs.step(action.cpu().numpy())
		done = np.bitwise_or(terminated, truncated).astype(np.float32) 
		rewards[step] = torch.tensor(reward).to(device).view(-1) 
		next_obs, next_done = torch.tensor(next_obs).to(device), torch.tensor(done).to(device) 

	with torch.no_grad(): 
		next_value = agent.get_value(next_obs).reshape(1, -1) 
		# no GAE 	
		returns = torch.zeros_like(rewards).to(device) 
		for t in reversed(range(num_steps)): 
			if t == num_steps - 1: 
				next_non_terminal = 1.0 - next_done 
				next_return = next_value 
			else: 
				next_non_terminal = 1.0 - dones[t + 1]
				next_return = returns[t + 1]
			returns[t] = rewards[t] + gamma * next_non_terminal * next_return 
		advantages = returns - values 

	b_obs = obs.reshape((-1,) + envs.single_observation_space.shape) 
	b_logprobs = logprobs.reshape(-1) 
	b_actions = actions.reshape((-1,) + envs.single_action_space.shape)
	b_advantages = advantages.reshape(-1) 
	b_returns = returns.reshape(-1) 
	b_values = values.reshape(-1) 

	b_inds = np.arange(bs) 
	clipfracs = list() 
	
	for epoch in range(update_epochs): 
		np.random.shuffle(b_inds)
		for start in range(0, bs, mini_bs): 
			end = start + mini_bs 
			mb_inds = b_inds[start:end]

			_, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions.long()[mb_inds])
			logratio = newlogprob - b_logprobs[mb_inds]
			ratio = logratio.exp() 

			with torch.no_grad(): 
				old_approx_kl = (-logprob).mean() 
				approx_kl = ((ratio - 1) - logratio).mean() 
				clipfracs += [((ratio - 1.0).abs() > clip_coef).float().mean().item()]
			
			mb_advantages = b_advantages[mb_inds]
			# normalizing advantages can be turned off 
			mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8)

			pg_loss1 = -mb_advantages * ratio 
			pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - clip_coef, 1 + clip_coef) 
			pg_loss = torch.max(pg_loss1, pg_loss2).mean() 

			v_loss_unclipped = (newvalue - b_returns[mb_inds])**2 
			v_clipped = b_values[mb_inds] + torch.clamp(newvalue - b_values[mb_inds], -clip_coef, clip_coef)
			v_loss_clipped = (v_clipped - b_returns[mb_inds])**2 
			v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped) 
			v_loss = 0.5 * v_loss_max.mean() 

			entropy_loss = entropy.mean() 
			loss = pg_loss - ent_coef * entropy_loss + v_loss * vf_coef 

			optimizer.zero_grad() 
			loss.backward() 
			nn.utils.clip_grad_norm_(agent.parameters(), 0.5) 
			optimizer.step() 

		y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() 
		var_y = np.var(y_true) 
		explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y 

		writer.add_scalar('charts/learning_rate', optimizer.param_groups[0]['lr'], global_step)
		writer.add_scalar('losses/value_loss', v_loss.item(), global_step)
		writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step)
		writer.add_scalar("losses/entropy", entropy_loss.item(), global_step)
		writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), global_step)
		writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step)
		writer.add_scalar("losses/clipfrac", np.mean(clipfracs), global_step)
		writer.add_scalar("losses/explained_variance", explained_var, global_step)

  0%|          | 0/4096 [00:02<?, ?it/s]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-0.mp4



  0%|          | 0/4096 [00:02<?, ?it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-0.mp4
Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-0.mp4



  0%|          | 0/4096 [00:02<?, ?it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-0.mp4
Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-0.mp4



  0%|          | 0/4096 [00:03<?, ?it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-0.mp4


  0%|          | 0/4096 [00:03<?, ?it/s]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1.mp4



  0%|          | 0/4096 [00:03<?, ?it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1.mp4
Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-0.mp4.


  0%|          | 0/4096 [00:03<?, ?it/s]

Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-0.mp4



  0%|          | 0/4096 [00:03<?, ?it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-0.mp4


  0%|          | 0/4096 [00:03<?, ?it/s]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1.mp4



  0%|          | 0/4096 [00:03<?, ?it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1.mp4
Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1.mp4



  0%|          | 0/4096 [00:03<?, ?it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1.mp4


  0%|          | 0/4096 [00:03<?, ?it/s]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1.mp4



  0%|          | 0/4096 [00:03<?, ?it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1.mp4


  0%|          | 1/4096 [00:04<5:28:06,  4.81s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-8.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-8.mp4



  0%|          | 1/4096 [00:05<5:28:06,  4.81s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-8.mp4
Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-8.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-8.mp4



  0%|          | 1/4096 [00:05<5:28:06,  4.81s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-8.mp4


  0%|          | 1/4096 [00:05<5:28:06,  4.81s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-8.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-8.mp4



  0%|          | 1/4096 [00:05<5:28:06,  4.81s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-8.mp4


  0%|          | 2/4096 [00:06<3:15:46,  2.87s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-8.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-8.mp4



  0%|          | 2/4096 [00:06<3:15:46,  2.87s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-8.mp4


  0%|          | 5/4096 [00:09<1:33:40,  1.37s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-27.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-27.mp4



  0%|          | 5/4096 [00:09<1:33:40,  1.37s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-27.mp4


  0%|          | 6/4096 [00:10<1:31:12,  1.34s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-27.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-27.mp4



  0%|          | 6/4096 [00:10<1:31:12,  1.34s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-27.mp4


  0%|          | 6/4096 [00:11<1:31:12,  1.34s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-27.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-27.mp4



  0%|          | 6/4096 [00:11<1:31:12,  1.34s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-27.mp4
Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-27.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-27.mp4



  0%|          | 6/4096 [00:11<1:31:12,  1.34s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-27.mp4


  0%|          | 17/4096 [00:22<1:04:59,  1.05it/s]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-64.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-64.mp4



  0%|          | 17/4096 [00:22<1:04:59,  1.05it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-64.mp4


  0%|          | 18/4096 [00:23<1:16:28,  1.13s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-64.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-64.mp4



  0%|          | 18/4096 [00:23<1:16:28,  1.13s/it]

Moviepy - Done !


  0%|          | 18/4096 [00:23<1:16:28,  1.13s/it]

Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-64.mp4


  0%|          | 19/4096 [00:24<1:17:00,  1.13s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-64.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-64.mp4



  0%|          | 19/4096 [00:25<1:17:00,  1.13s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-64.mp4


  0%|          | 20/4096 [00:26<1:23:16,  1.23s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-64.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-64.mp4



  0%|          | 20/4096 [00:26<1:23:16,  1.23s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-64.mp4


  1%|          | 42/4096 [00:48<1:18:20,  1.16s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-125.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-125.mp4



  1%|          | 42/4096 [00:48<1:18:20,  1.16s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-125.mp4


  1%|          | 43/4096 [00:50<1:22:14,  1.22s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-125.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-125.mp4



  1%|          | 43/4096 [00:50<1:22:14,  1.22s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-125.mp4


  1%|          | 45/4096 [00:53<1:24:21,  1.25s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-125.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-125.mp4



  1%|          | 45/4096 [00:53<1:24:21,  1.25s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-125.mp4


  1%|          | 46/4096 [00:54<1:40:27,  1.49s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-125.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-125.mp4



  1%|          | 46/4096 [00:55<1:40:27,  1.49s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-125.mp4


  2%|▏         | 86/4096 [01:34<1:09:41,  1.04s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-216.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-216.mp4



  2%|▏         | 86/4096 [01:34<1:09:41,  1.04s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-216.mp4


  2%|▏         | 88/4096 [01:36<1:13:23,  1.10s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-216.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-216.mp4



  2%|▏         | 88/4096 [01:36<1:13:23,  1.10s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-216.mp4


  2%|▏         | 89/4096 [01:38<1:14:38,  1.12s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-216.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-216.mp4



  2%|▏         | 89/4096 [01:38<1:14:38,  1.12s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-216.mp4


  2%|▏         | 94/4096 [01:43<1:08:58,  1.03s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-216.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-216.mp4



  2%|▏         | 94/4096 [01:43<1:08:58,  1.03s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-216.mp4


  5%|▍         | 194/4096 [03:17<58:11,  1.12it/s]  

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-343.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-343.mp4



  5%|▍         | 194/4096 [03:18<58:11,  1.12it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-343.mp4


  5%|▍         | 195/4096 [03:19<1:11:47,  1.10s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-343.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-343.mp4



  5%|▍         | 195/4096 [03:19<1:11:47,  1.10s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-343.mp4


  5%|▍         | 196/4096 [03:20<1:22:04,  1.26s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-343.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-343.mp4



  5%|▍         | 196/4096 [03:21<1:22:04,  1.26s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-343.mp4


  5%|▍         | 204/4096 [03:29<1:10:47,  1.09s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-343.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-343.mp4



  5%|▍         | 204/4096 [03:30<1:10:47,  1.09s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-343.mp4


  8%|▊         | 327/4096 [05:19<54:14,  1.16it/s]  

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-512.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-512.mp4



  8%|▊         | 327/4096 [05:20<54:14,  1.16it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-512.mp4


  8%|▊         | 328/4096 [05:21<1:05:44,  1.05s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-512.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-512.mp4



  8%|▊         | 328/4096 [05:21<1:05:44,  1.05s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-512.mp4


  8%|▊         | 329/4096 [05:22<1:15:10,  1.20s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-512.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-512.mp4



  8%|▊         | 329/4096 [05:22<1:15:10,  1.20s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-512.mp4


  8%|▊         | 340/4096 [05:32<1:01:42,  1.01it/s]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-512.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-512.mp4



  8%|▊         | 340/4096 [05:33<1:01:42,  1.01it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-512.mp4


 16%|█▋        | 670/4096 [10:11<56:36,  1.01it/s]  

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-729.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-729.mp4



 16%|█▋        | 670/4096 [10:12<56:36,  1.01it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-729.mp4


 17%|█▋        | 676/4096 [10:17<55:45,  1.02it/s]  

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-729.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-729.mp4



 17%|█▋        | 676/4096 [10:18<55:45,  1.02it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-729.mp4


 17%|█▋        | 689/4096 [10:30<1:02:31,  1.10s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-729.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-729.mp4



 17%|█▋        | 689/4096 [10:30<1:02:31,  1.10s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-729.mp4


 18%|█▊        | 726/4096 [11:02<1:03:36,  1.13s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-729.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-729.mp4



 18%|█▊        | 726/4096 [11:03<1:03:36,  1.13s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-729.mp4


 32%|███▏      | 1316/4096 [18:43<37:51,  1.22it/s] 

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1000.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1000.mp4



 32%|███▏      | 1316/4096 [18:43<37:51,  1.22it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1000.mp4


 32%|███▏      | 1320/4096 [18:48<55:42,  1.20s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1000.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1000.mp4



 32%|███▏      | 1320/4096 [18:49<55:42,  1.20s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1000.mp4


 32%|███▏      | 1327/4096 [18:55<44:38,  1.03it/s]  

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1000.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1000.mp4



 32%|███▏      | 1327/4096 [18:55<44:38,  1.03it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1000.mp4


 32%|███▏      | 1329/4096 [18:58<49:02,  1.06s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1000.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1000.mp4



 32%|███▏      | 1329/4096 [18:58<49:02,  1.06s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-1000.mp4


 72%|███████▏  | 2962/4096 [40:24<19:07,  1.01s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-2000.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-2000.mp4



 72%|███████▏  | 2962/4096 [40:25<19:07,  1.01s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-2000.mp4


 73%|███████▎  | 2988/4096 [40:48<18:03,  1.02it/s]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-2000.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-2000.mp4



 73%|███████▎  | 2988/4096 [40:49<18:03,  1.02it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-2000.mp4


 73%|███████▎  | 2996/4096 [40:56<16:04,  1.14it/s]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-2000.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-2000.mp4



 73%|███████▎  | 2996/4096 [40:56<16:04,  1.14it/s]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-2000.mp4


 76%|███████▌  | 3106/4096 [42:24<16:37,  1.01s/it]

Moviepy - Building video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-2000.mp4.
Moviepy - Writing video c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-2000.mp4



 76%|███████▌  | 3106/4096 [42:24<16:37,  1.01s/it]

Moviepy - Done !
Moviepy - video ready c:\Users\Asus\OneDrive\Programming\basic_rl\videos\ppo\cartpole.mp4\rl-video-episode-2000.mp4


100%|██████████| 4096/4096 [55:21<00:00,  1.23it/s]


In [10]:
torch.save(agent.state_dict(), 'models/cartpole_ppo/4096_train.pth')