In [1]:
from typing import Any, Tuple, Union

import numpy as np
import torch
import gymnasium as gym
from pandas.core.api import DataFrame as DataFrame
from torch import nn
from torch.distributions.categorical import Categorical

from user_data.rl.models.base_model import BaseActorCriticModel
from user_data.rl.trainer.ppo import PpoTrainer, EnvProvider
from user_data.rl.trainer.args import Args


def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class ActorCriticModel(BaseActorCriticModel):
    def __init__(self, observation_space_shape: Tuple, number_of_action: int) -> None:
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(observation_space_shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        self.actor = nn.Sequential(
            layer_init(nn.Linear(np.array(observation_space_shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, number_of_action), std=0.01),
        )

    def get_value(self, x):
        return self.critic(x)

    def get_action_and_value(self, x, action=None):
        logits = self.actor(x)
        probs = Categorical(logits=logits)
        if action is None:
            action = probs.sample()
        return action, probs.log_prob(action), probs.entropy(), self.critic(x)

In [2]:
args = Args(
    tensorboard_log_dir='./playground/runs',
    video_path='./playground/videos',
    capture_video=True,
    track=True,
    wandb_project_name='cleanRL',
)
env_provider: EnvProvider = lambda render_mode: gym.make("LunarLander-v2", render_mode=render_mode)
model = ActorCriticModel(env_provider(None).observation_space.shape, env_provider(None).action_space.n)
trainer = PpoTrainer(env_provider, model, args)

In [3]:
args

Args(exp_name='args', seed=1, torch_deterministic=True, cuda=True, track=True, wandb_project_name='cleanRL', wandb_entity=None, tensorboard_log_dir='./playground/runs', capture_video=True, video_path='./playground/videos', total_timesteps=500000, learning_rate=0.00025, num_envs=4, num_steps=128, anneal_lr=True, gamma=0.99, gae_lambda=0.95, num_minibatches=4, update_epochs=4, norm_adv=True, clip_coef=0.2, clip_vloss=True, ent_coef=0.01, vf_coef=0.5, max_grad_norm=0.5, target_kl=None, batch_size=512, minibatch_size=128, num_iterations=976)

In [4]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mlxchen[0m. Use [1m`wandb login --relogin`[0m to force relogin


global_step=424, episodic_return=[-170.81808]
Moviepy - Building video /workspaces/rl-trading-freqtrade/playground/videos/args__1__1708265247/rl-video-episode-0.mp4.
Moviepy - Writing video /workspaces/rl-trading-freqtrade/playground/videos/args__1__1708265247/rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready /workspaces/rl-trading-freqtrade/playground/videos/args__1__1708265247/rl-video-episode-0.mp4
global_step=472, episodic_return=[-320.47113]
global_step=476, episodic_return=[-81.00199]
global_step=484, episodic_return=[-20.284828]
SPS: 260
Moviepy - Building video /workspaces/rl-trading-freqtrade/playground/videos/args__1__1708265247/rl-video-episode-1.mp4.
Moviepy - Writing video /workspaces/rl-trading-freqtrade/playground/videos/args__1__1708265247/rl-video-episode-1.mp4



                                                   

Moviepy - Done !
Moviepy - video ready /workspaces/rl-trading-freqtrade/playground/videos/args__1__1708265247/rl-video-episode-1.mp4
global_step=700, episodic_return=[-95.84424]
global_step=784, episodic_return=[-81.61914]
global_step=844, episodic_return=[-123.97433]




global_step=948, episodic_return=[-176.86417]
global_step=1000, episodic_return=[-130.58908]
SPS: 1989
global_step=1136, episodic_return=[-112.11347]
global_step=1188, episodic_return=[-513.1245]
global_step=1236, episodic_return=[-108.53128]
global_step=1352, episodic_return=[-254.07227]
global_step=1404, episodic_return=[-155.40775]
SPS: 5084
global_step=1624, episodic_return=[-161.66263]
global_step=1712, episodic_return=[-250.63983]
global_step=1752, episodic_return=[-279.94476]
global_step=1816, episodic_return=[-151.1982]
global_step=1888, episodic_return=[-66.41669]
global_step=2004, episodic_return=[-85.487854]
SPS: 7616
global_step=2080, episodic_return=[-77.71797]
global_step=2236, episodic_return=[-90.8588]
global_step=2288, episodic_return=[-73.672516]
global_step=2416, episodic_return=[-181.13174]
global_step=2528, episodic_return=[-108.43175]
global_step=2540, episodic_return=[-256.4453]
SPS: 9928
global_step=2672, episodic_return=[-117.0497]
global_step=2760, episodic_re

                                                   

Moviepy - Done !
Moviepy - video ready /workspaces/rl-trading-freqtrade/playground/videos/args__1__1708265247/rl-video-episode-8.mp4
global_step=3184, episodic_return=[-99.38037]
global_step=3256, episodic_return=[-90.29702]




global_step=3428, episodic_return=[-367.78543]
global_step=3492, episodic_return=[-333.34326]
global_step=3520, episodic_return=[-45.960888]
SPS: 7970
global_step=3588, episodic_return=[-391.92007]
global_step=3704, episodic_return=[-103.13723]
global_step=3876, episodic_return=[-190.54059]
global_step=3968, episodic_return=[-179.28835]
global_step=4008, episodic_return=[-152.85043]
SPS: 15547
global_step=4236, episodic_return=[-287.54254]
global_step=4244, episodic_return=[-114.11825]
global_step=4292, episodic_return=[-70.982895]
global_step=4368, episodic_return=[-237.88898]
SPS: 18158
global_step=4628, episodic_return=[-366.49695]
global_step=4692, episodic_return=[-177.48434]
global_step=4720, episodic_return=[-455.00082]
global_step=4788, episodic_return=[-259.9263]
global_step=4948, episodic_return=[-62.867176]
global_step=5112, episodic_return=[-81.69168]
SPS: 19900
global_step=5132, episodic_return=[-155.88579]
global_step=5188, episodic_return=[-100.93275]
global_step=5344, e

                                                              

Moviepy - Done !
Moviepy - video ready /workspaces/rl-trading-freqtrade/playground/videos/args__1__1708265247/rl-video-episode-27.mp4
global_step=10656, episodic_return=[-137.44273]
SPS: 14380
global_step=10776, episodic_return=[-91.650856]
global_step=10904, episodic_return=[-14.87278]
global_step=11052, episodic_return=[-148.26839]
global_step=11176, episodic_return=[-174.02744]
global_step=11188, episodic_return=[-70.5014]
global_step=11260, episodic_return=[-280.15802]
SPS: 31729
global_step=11604, episodic_return=[-143.55933]
global_step=11608, episodic_return=[10.531609]
global_step=11636, episodic_return=[-231.88702]
global_step=11672, episodic_return=[-440.92285]
SPS: 30054
global_step=12036, episodic_return=[-72.774]
global_step=12060, episodic_return=[-49.249435]
global_step=12112, episodic_return=[-112.65435]
global_step=12236, episodic_return=[-292.04587]
SPS: 37598
global_step=12352, episodic_return=[-79.00737]
global_step=12508, episodic_return=[-146.39279]
global_step=12

                                                              

Moviepy - Done !
Moviepy - video ready /workspaces/rl-trading-freqtrade/playground/videos/args__1__1708265247/rl-video-episode-64.mp4
global_step=26244, episodic_return=[-314.31244]
global_step=26244, episodic_return=[-83.48358]
global_step=26328, episodic_return=[-445.2794]
global_step=26556, episodic_return=[-312.47147]
global_step=26580, episodic_return=[-52.332623]
global_step=26592, episodic_return=[-66.183975]
SPS: 42516
global_step=26808, episodic_return=[-235.93843]
global_step=26872, episodic_return=[-103.29326]
global_step=26920, episodic_return=[-10.478546]
global_step=26968, episodic_return=[-142.51959]
SPS: 86110
global_step=27148, episodic_return=[-108.344635]
global_step=27188, episodic_return=[-89.88839]
global_step=27392, episodic_return=[-171.81262]
global_step=27424, episodic_return=[-396.98495]
global_step=27444, episodic_return=[-107.6791]
global_step=27528, episodic_return=[-236.4453]
SPS: 82992
global_step=27844, episodic_return=[-230.98766]
global_step=27844, ep

                                                             

Moviepy - Done !
Moviepy - video ready /workspaces/rl-trading-freqtrade/playground/videos/args__1__1708265247/rl-video-episode-125.mp4
global_step=55140, episodic_return=[-55.75441]
SPS: 87080
global_step=55396, episodic_return=[-69.81433]
global_step=55412, episodic_return=[-22.504501]
global_step=55436, episodic_return=[-97.28244]
global_step=55740, episodic_return=[-93.85631]
SPS: 167381
global_step=55844, episodic_return=[-141.57959]
global_step=55980, episodic_return=[-215.60672]
global_step=56208, episodic_return=[-169.30373]
SPS: 161591
global_step=56340, episodic_return=[-101.40668]
global_step=56552, episodic_return=[-100.697945]
global_step=56652, episodic_return=[-218.77142]
global_step=56672, episodic_return=[-53.37423]
SPS: 160157
global_step=56924, episodic_return=[-45.785507]
global_step=57064, episodic_return=[-157.29156]
global_step=57072, episodic_return=[-253.75255]
global_step=57328, episodic_return=[-178.09392]
SPS: 160886
global_step=57548, episodic_return=[-214.1

                                                               

Moviepy - Done !
Moviepy - video ready /workspaces/rl-trading-freqtrade/playground/videos/args__1__1708265247/rl-video-episode-216.mp4
global_step=103864, episodic_return=[-136.02975]
SPS: 129431
global_step=103988, episodic_return=[-267.7459]
global_step=104016, episodic_return=[-200.66699]
global_step=104192, episodic_return=[23.48452]
global_step=104388, episodic_return=[-9.519295]
SPS: 296158
global_step=104516, episodic_return=[18.122208]
global_step=104880, episodic_return=[-261.4129]
SPS: 300725
global_step=105104, episodic_return=[-196.17665]
global_step=105108, episodic_return=[-74.99983]
global_step=105436, episodic_return=[-152.34828]
SPS: 277214
global_step=105660, episodic_return=[-49.04162]
global_step=105728, episodic_return=[-138.86581]
SPS: 332364
global_step=106204, episodic_return=[-43.13994]
global_step=106312, episodic_return=[-56.533226]
global_step=106336, episodic_return=[-393.65054]
global_step=106468, episodic_return=[-306.96527]
SPS: 297851
global_step=106644

                                                                

Moviepy - Done !
Moviepy - video ready /workspaces/rl-trading-freqtrade/playground/videos/args__1__1708265247/rl-video-episode-343.mp4
global_step=406344, episodic_return=[16.33562]
SPS: 213332
global_step=406844, episodic_return=[1.1297215]
SPS: 1044971
global_step=407528, episodic_return=[25.737741]
SPS: 1176696
global_step=407896, episodic_return=[47.860847]
SPS: 1023246
SPS: 1062781
SPS: 943758
SPS: 1161626
SPS: 1169847
global_step=410344, episodic_return=[-47.2439]
SPS: 1259042
global_step=410844, episodic_return=[-10.585995]
SPS: 1119621
global_step=411528, episodic_return=[29.495455]
SPS: 1066362
global_step=411896, episodic_return=[26.78958]
SPS: 949239
SPS: 1065364
SPS: 1126796
SPS: 1124005
SPS: 1176248
global_step=414344, episodic_return=[-38.90537]
SPS: 1397165
global_step=414728, episodic_return=[-111.43124]
global_step=414844, episodic_return=[-29.478765]
SPS: 1118979
SPS: 1166848
global_step=415896, episodic_return=[54.70369]
SPS: 1043965
SPS: 1022095
SPS: 875936
global_s