Skip to content

Commit

Permalink
Merge pull request #185 from zuoxingdong/step_info_trajectory
Browse files Browse the repository at this point in the history
minor update to DDPG
  • Loading branch information
zuoxingdong committed May 12, 2019
2 parents 93d5d6b + 5a82af3 commit feebc54
Show file tree
Hide file tree
Showing 140 changed files with 172 additions and 32 deletions.
17 changes: 7 additions & 10 deletions baselines/ddpg/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@ def __init__(self, config, env, device, **kwargs):
self.critic_target.eval()
self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=config['agent.critic.lr'])

self.action_noise = config['agent.action_noise']

def polyak_update_target(self):
p = self.config['agent.polyak']
for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):
Expand All @@ -88,7 +86,7 @@ def choose_action(self, obs, **kwargs):
with torch.no_grad():
action = numpify(self.actor(obs), 'float')
if kwargs['mode'] == 'train':
eps = np.random.normal(0.0, self.action_noise, size=action.shape)
eps = np.random.normal(0.0, self.config['agent.action_noise'], size=action.shape)
action = np.clip(action + eps, self.env.action_space.low, self.env.action_space.high)
out = {}
out['action'] = action
Expand All @@ -104,12 +102,11 @@ def learn(self, D, **kwargs):
for i in range(episode_length):
observations, actions, rewards, next_observations, masks = replay.sample(self.config['replay.batch_size'])

Qs = self.critic(observations, actions).squeeze()
Qs = self.critic(observations, actions)
with torch.no_grad():
next_Qs = self.critic_target(next_observations, self.actor_target(next_observations)).squeeze()
targets = rewards + self.config['agent.gamma']*masks*next_Qs.detach()

critic_loss = F.mse_loss(Qs, targets)
next_Qs = self.critic_target(next_observations, self.actor_target(next_observations))
targets = rewards + self.config['agent.gamma']*masks*next_Qs
critic_loss = F.mse_loss(Qs, targets.detach())
self.actor_optimizer.zero_grad()
self.critic_optimizer.zero_grad()
critic_loss.backward()
Expand All @@ -128,9 +125,9 @@ def learn(self, D, **kwargs):
out['actor_loss'].append(actor_loss)
out['critic_loss'].append(critic_loss)
Q_vals.append(Qs)
out['actor_loss'] = torch.stack(out['actor_loss']).mean().item()
out['actor_loss'] = torch.tensor(out['actor_loss']).mean().item()
out['actor_grad_norm'] = actor_grad_norm
out['critic_loss'] = torch.stack(out['critic_loss']).mean().item()
out['critic_loss'] = torch.tensor(out['critic_loss']).mean().item()
out['critic_grad_norm'] = critic_grad_norm
describe_it = lambda x: describe(numpify(torch.cat(x), 'float').squeeze(), axis=-1, repr_indent=1, repr_prefix='\n')
out['Q'] = describe_it(Q_vals)
Expand Down
4 changes: 2 additions & 2 deletions baselines/ddpg/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

'replay.capacity': 1000000,
# number of time steps to take uniform actions initially
'replay.init_size': Condition(lambda x: 1000 if x['env.id'] in ['Hopper-v3', 'Walker2d-v3'] else 10000),
'replay.init_size': Condition(lambda x: 1000 if x['env.id'] in ['Hopper-v3', 'Walker2d-v3'] else 10000),
'replay.batch_size': 100,

'train.timestep': int(1e6), # total number of training (environmental) timesteps
Expand Down Expand Up @@ -86,5 +86,5 @@ def run(config, seed, device, logdir):
log_dir='logs/default',
max_workers=os.cpu_count(),
chunksize=1,
use_gpu=True,
use_gpu=True, # GPU much faster, note that performance differs between CPU/GPU
gpu_ids=None)
Binary file modified baselines/ddpg/logs/default/0/172236777/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file modified baselines/ddpg/logs/default/0/172236777/agent_500.pth
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"initial_reset_timestamp": 1557676127.9693623, "timestamps": [1557676148.3624852], "episode_lengths": [1000], "episode_rewards": [1913.8975264544883], "episode_types": ["t", "t"]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"stats": "openaigym.episode_batch.0.3918876.stats.json", "videos": [["openaigym.video.0.3918876.video000000.mp4", "openaigym.video.0.3918876.video000000.meta.json"], ["openaigym.video.0.3918876.video000001.mp4", "openaigym.video.0.3918876.video000001.meta.json"]], "env_info": {"gym_version": "0.12.1", "env_id": "HalfCheetah-v3"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"episode_id": 0, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 3.4.4-0ubuntu0.18.04.1 Copyright (c) 2000-2018 the FFmpeg developers\\nbuilt with gcc 7 (Ubuntu 7.3.0-16ubuntu3)\\nconfiguration: --prefix=/usr --extra-version=0ubuntu0.18.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --enable-gpl --disable-stripping --enable-avresample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librubberband --enable-librsvg --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-omx --enable-openal --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libopencv --enable-libx264 --enable-shared\\nlibavutil 55. 78.100 / 55. 78.100\\nlibavcodec 57.107.100 / 57.107.100\\nlibavformat 57. 83.100 / 57. 83.100\\nlibavdevice 57. 10.100 / 57. 10.100\\nlibavfilter 6.107.100 / 6.107.100\\nlibavresample 3. 7. 0 / 3. 7. 0\\nlibswscale 4. 8.100 / 4. 8.100\\nlibswresample 2. 9.100 / 2. 9.100\\nlibpostproc 54. 7.100 / 54. 7.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-r", "20", "-f", "rawvideo", "-s:v", "500x500", "-pix_fmt", "rgb24", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "/home/zuo/Code/lagom/baselines/ddpg/logs/default/0/172236777/anim/openaigym.video.0.3918876.video000000.mp4"]}}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"episode_id": 1, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 3.4.4-0ubuntu0.18.04.1 Copyright (c) 2000-2018 the FFmpeg developers\\nbuilt with gcc 7 (Ubuntu 7.3.0-16ubuntu3)\\nconfiguration: --prefix=/usr --extra-version=0ubuntu0.18.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --enable-gpl --disable-stripping --enable-avresample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librubberband --enable-librsvg --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-omx --enable-openal --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libopencv --enable-libx264 --enable-shared\\nlibavutil 55. 78.100 / 55. 78.100\\nlibavcodec 57.107.100 / 57.107.100\\nlibavformat 57. 83.100 / 57. 83.100\\nlibavdevice 57. 10.100 / 57. 10.100\\nlibavfilter 6.107.100 / 6.107.100\\nlibavresample 3. 7. 0 / 3. 7. 0\\nlibswscale 4. 8.100 / 4. 8.100\\nlibswresample 2. 9.100 / 2. 9.100\\nlibpostproc 54. 7.100 / 54. 7.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-r", "20", "-f", "rawvideo", "-s:v", "500x500", "-pix_fmt", "rgb24", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "/home/zuo/Code/lagom/baselines/ddpg/logs/default/0/172236777/anim/openaigym.video.0.3918876.video000001.mp4"]}}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified baselines/ddpg/logs/default/0/2876994566/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file modified baselines/ddpg/logs/default/0/2876994566/agent_500.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified baselines/ddpg/logs/default/0/3503522377/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file modified baselines/ddpg/logs/default/0/3503522377/agent_500.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified baselines/ddpg/logs/default/0/3949341511/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file modified baselines/ddpg/logs/default/0/3949341511/agent_500.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified baselines/ddpg/logs/default/0/4153361530/agent_1.pth
Binary file not shown.
Binary file not shown.
Binary file modified baselines/ddpg/logs/default/0/4153361530/agent_500.pth
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"initial_reset_timestamp": 1557676203.9763553, "timestamps": [1557676222.6762834], "episode_lengths": [1000], "episode_rewards": [6107.835505620261], "episode_types": ["t", "t"]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"stats": "openaigym.episode_batch.1.3918876.stats.json", "videos": [["openaigym.video.1.3918876.video000000.mp4", "openaigym.video.1.3918876.video000000.meta.json"], ["openaigym.video.1.3918876.video000001.mp4", "openaigym.video.1.3918876.video000001.meta.json"]], "env_info": {"gym_version": "0.12.1", "env_id": "HalfCheetah-v3"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"episode_id": 0, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 3.4.4-0ubuntu0.18.04.1 Copyright (c) 2000-2018 the FFmpeg developers\\nbuilt with gcc 7 (Ubuntu 7.3.0-16ubuntu3)\\nconfiguration: --prefix=/usr --extra-version=0ubuntu0.18.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --enable-gpl --disable-stripping --enable-avresample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librubberband --enable-librsvg --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-omx --enable-openal --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libopencv --enable-libx264 --enable-shared\\nlibavutil 55. 78.100 / 55. 78.100\\nlibavcodec 57.107.100 / 57.107.100\\nlibavformat 57. 83.100 / 57. 83.100\\nlibavdevice 57. 10.100 / 57. 10.100\\nlibavfilter 6.107.100 / 6.107.100\\nlibavresample 3. 7. 0 / 3. 7. 0\\nlibswscale 4. 8.100 / 4. 8.100\\nlibswresample 2. 9.100 / 2. 9.100\\nlibpostproc 54. 7.100 / 54. 7.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-r", "20", "-f", "rawvideo", "-s:v", "500x500", "-pix_fmt", "rgb24", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "/home/zuo/Code/lagom/baselines/ddpg/logs/default/0/4153361530/anim/openaigym.video.1.3918876.video000000.mp4"]}}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"episode_id": 1, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 3.4.4-0ubuntu0.18.04.1 Copyright (c) 2000-2018 the FFmpeg developers\\nbuilt with gcc 7 (Ubuntu 7.3.0-16ubuntu3)\\nconfiguration: --prefix=/usr --extra-version=0ubuntu0.18.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --enable-gpl --disable-stripping --enable-avresample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librubberband --enable-librsvg --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-omx --enable-openal --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libopencv --enable-libx264 --enable-shared\\nlibavutil 55. 78.100 / 55. 78.100\\nlibavcodec 57.107.100 / 57.107.100\\nlibavformat 57. 83.100 / 57. 83.100\\nlibavdevice 57. 10.100 / 57. 10.100\\nlibavfilter 6.107.100 / 6.107.100\\nlibavresample 3. 7. 0 / 3. 7. 0\\nlibswscale 4. 8.100 / 4. 8.100\\nlibswresample 2. 9.100 / 2. 9.100\\nlibpostproc 54. 7.100 / 54. 7.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-r", "20", "-f", "rawvideo", "-s:v", "500x500", "-pix_fmt", "rgb24", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "/home/zuo/Code/lagom/baselines/ddpg/logs/default/0/4153361530/anim/openaigym.video.1.3918876.video000001.mp4"]}}

0 comments on commit feebc54

Please sign in to comment.