Skip to content

Commit

Permalink
minor update to SAC: support DM_control training
Browse files Browse the repository at this point in the history
  • Loading branch information
zuoxingdong committed May 13, 2019
1 parent bb5525d commit 8b2f96b
Show file tree
Hide file tree
Showing 173 changed files with 143 additions and 84 deletions.
96 changes: 96 additions & 0 deletions baselines/sac/experiment_dm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import os
from pathlib import Path

import gym
from gym.wrappers import FlattenDictWrapper

from dm_control import suite
from dm2gym import DMControlEnv

from lagom.utils import pickle_dump
from lagom.utils import set_global_seeds
from lagom.experiment import Config
from lagom.experiment import Grid
from lagom.experiment import Sample
from lagom.experiment import Condition
from lagom.experiment import run_experiment
from lagom.envs import make_vec_env
from lagom.envs.wrappers import TimeLimit
from lagom.envs.wrappers import NormalizeAction
from lagom.envs.wrappers import VecMonitor
from lagom.envs.wrappers import VecStepInfo

from baselines.sac.agent import Agent
from baselines.sac.engine import Engine
from baselines.sac.replay_buffer import ReplayBuffer


config = Config(
{'log.freq': 1000, # every n timesteps
'checkpoint.num': 3,

'env.id': Grid([('cheetah', 'run'), ('hopper', 'hop'), ('walker', 'run'), ('fish', 'upright')]),

'agent.gamma': 0.99,
'agent.polyak': 0.995, # polyak averaging coefficient for targets update
'agent.actor.lr': 3e-4,
'agent.actor.use_lr_scheduler': False,
'agent.critic.lr': 3e-4,
'agent.critic.use_lr_scheduler': False,
'agent.initial_temperature': 1.0,
'agent.max_grad_norm': 999999, # grad clipping by norm

'replay.capacity': 1000000,
# number of time steps to take uniform actions initially
'replay.init_size': Condition(lambda x: 1000 if x['env.id'] in ['Hopper-v3', 'Walker2d-v3'] else 10000),
'replay.batch_size': 256,

'train.timestep': int(1e6), # total number of training (environmental) timesteps
'eval.freq': 5000,
'eval.num_episode': 10

})


def make_env(config, seed):
def _make_env():
domain_name, task_name = config['env.id']
env = suite.load(domain_name, task_name, environment_kwargs=dict(flat_observation=True))
env = DMControlEnv(env)
env = FlattenDictWrapper(env, ['observations'])
env = TimeLimit(env, env.spec.max_episode_steps)
env = NormalizeAction(env)
return env
env = make_vec_env(_make_env, 1, seed) # single environment
return env


def run(config, seed, device, logdir):
set_global_seeds(seed)

env = make_env(config, seed)
env = VecMonitor(env)
env = VecStepInfo(env)

eval_env = make_env(config, seed)
eval_env = VecMonitor(eval_env)

agent = Agent(config, env, device)
replay = ReplayBuffer(env, config['replay.capacity'], device)
engine = Engine(config, agent=agent, env=env, eval_env=eval_env, replay=replay, logdir=logdir)

train_logs, eval_logs = engine.train()
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
pickle_dump(obj=eval_logs, f=logdir/'eval_logs', ext='.pkl')
return None


if __name__ == '__main__':
run_experiment(run=run,
config=config,
seeds=[4153361530, 3503522377, 2876994566, 172236777, 3949341511, 849059707],
log_dir='logs/default_dm',
max_workers=os.cpu_count(),
chunksize=1,
use_gpu=True, # GPU much faster, note that performance differs between CPU/GPU
gpu_ids=None)
Binary file modified baselines/sac/logs/default/0/172236777/agent_1.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/172236777/agent_1000.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/172236777/agent_500.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/172236777/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/172236777/train_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/2876994566/agent_1.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/2876994566/agent_1000.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/2876994566/agent_500.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/2876994566/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/2876994566/train_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/3503522377/agent_1.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/3503522377/agent_1000.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/3503522377/agent_500.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/3503522377/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/3503522377/train_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/3949341511/agent_1.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/3949341511/agent_1000.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/3949341511/agent_500.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/3949341511/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/3949341511/train_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/4153361530/agent_1.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/4153361530/agent_1000.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/4153361530/agent_500.pth
Binary file not shown.

This file was deleted.

This file was deleted.

This file was deleted.

Binary file not shown.

This file was deleted.

Binary file not shown.
Binary file modified baselines/sac/logs/default/0/4153361530/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/4153361530/train_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/849059707/agent_1.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/849059707/agent_1000.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/849059707/agent_500.pth
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/849059707/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/0/849059707/train_logs.pkl
Binary file not shown.
3 changes: 0 additions & 3 deletions baselines/sac/logs/default/0/config.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
ID: 0
cuda: true
log.dir: logs/default
log.freq: 1000
checkpoint.num: 3
agent.gamma: 0.99
Expand All @@ -9,7 +7,6 @@ agent.actor.lr: 0.0003
agent.actor.use_lr_scheduler: false
agent.critic.lr: 0.0003
agent.critic.use_lr_scheduler: false
agent.policy_delay: 1
agent.initial_temperature: 1.0
agent.max_grad_norm: 999999
replay.capacity: 1000000
Expand Down
Binary file modified baselines/sac/logs/default/1/172236777/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/172236777/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/172236777/train_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/2876994566/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/2876994566/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/2876994566/train_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/3503522377/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/3503522377/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/3503522377/train_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/3949341511/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/3949341511/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/3949341511/train_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/4153361530/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

This file was deleted.

This file was deleted.

This file was deleted.

Binary file not shown.

This file was deleted.

Binary file not shown.
Binary file modified baselines/sac/logs/default/1/4153361530/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/4153361530/train_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/849059707/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/849059707/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/1/849059707/train_logs.pkl
Binary file not shown.
3 changes: 0 additions & 3 deletions baselines/sac/logs/default/1/config.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
ID: 1
cuda: true
log.dir: logs/default
log.freq: 1000
checkpoint.num: 3
agent.gamma: 0.99
Expand All @@ -9,7 +7,6 @@ agent.actor.lr: 0.0003
agent.actor.use_lr_scheduler: false
agent.critic.lr: 0.0003
agent.critic.use_lr_scheduler: false
agent.policy_delay: 1
agent.initial_temperature: 1.0
agent.max_grad_norm: 999999
replay.capacity: 1000000
Expand Down
Binary file modified baselines/sac/logs/default/2/172236777/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/172236777/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/172236777/train_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/2876994566/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/2876994566/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/2876994566/train_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/3503522377/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/3503522377/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/3503522377/train_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/3949341511/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/3949341511/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/3949341511/train_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/4153361530/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

This file was deleted.

This file was deleted.

This file was deleted.

Binary file not shown.

This file was deleted.

Binary file not shown.
Binary file modified baselines/sac/logs/default/2/4153361530/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/4153361530/train_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/849059707/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/849059707/eval_logs.pkl
Binary file not shown.
Binary file modified baselines/sac/logs/default/2/849059707/train_logs.pkl
Binary file not shown.
3 changes: 0 additions & 3 deletions baselines/sac/logs/default/2/config.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
ID: 2
cuda: true
log.dir: logs/default
log.freq: 1000
checkpoint.num: 3
agent.gamma: 0.99
Expand All @@ -9,7 +7,6 @@ agent.actor.lr: 0.0003
agent.actor.use_lr_scheduler: false
agent.critic.lr: 0.0003
agent.critic.use_lr_scheduler: false
agent.policy_delay: 1
agent.initial_temperature: 1.0
agent.max_grad_norm: 999999
replay.capacity: 1000000
Expand Down

0 comments on commit 8b2f96b

Please sign in to comment.