Skip to content

Commit

Permalink
Merge pull request #181 from zuoxingdong/step_info_trajectory
Browse files Browse the repository at this point in the history
sync SAC to latest refactoring [ongoing]
  • Loading branch information
zuoxingdong committed May 9, 2019
2 parents 1db06d1 + dac2dc8 commit 9bdbedd
Show file tree
Hide file tree
Showing 287 changed files with 1,209 additions and 75 deletions.
59 changes: 30 additions & 29 deletions baselines/sac/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,48 +177,49 @@ def learn(self, D, **kwargs):
for i in range(episode_length):
observations, actions, rewards, next_observations, masks = replay.sample(self.config['replay.batch_size'])

# Update Critic
Qs1, Qs2 = self.critic(observations, actions)
Qs1, Qs2 = map(lambda x: x.squeeze(-1), [Qs1, Qs2])
#Qs1, Qs2 = map(lambda x: x.squeeze(-1), [Qs1, Qs2])
with torch.no_grad():
out_actor = self.choose_action(next_observations, mode='train')
next_actions = out_actor['action']
next_actions_logprob = out_actor['action_logprob']
next_actions_logprob = out_actor['action_logprob'].unsqueeze(-1)
next_Qs1, next_Qs2 = self.critic_target(next_observations, next_actions)
next_Qs = torch.min(next_Qs1, next_Qs2).squeeze(-1) - self.alpha.detach()*next_actions_logprob
Q_targets = rewards + self.config['agent.gamma']*masks*next_Qs
next_Qs = torch.min(next_Qs1, next_Qs2) - self.alpha.detach()*next_actions_logprob
Q_targets = rewards.unsqueeze(-1) + self.config['agent.gamma']*masks.unsqueeze(-1)*next_Qs

critic_loss = F.mse_loss(Qs1, Q_targets.detach()) + F.mse_loss(Qs2, Q_targets.detach())
print(critic_loss.item())############
self.optimizer_zero_grad()
critic_loss.backward()
critic_grad_norm = nn.utils.clip_grad_norm_(self.critic.parameters(), self.config['agent.max_grad_norm'])
self.critic_optimizer.step()

if i % self.config['agent.policy_delay'] == 0:
out_actor = self.choose_action(observations, mode='train')
policy_actions = out_actor['action']
policy_actions_logprob = out_actor['action_logprob']

actor_Qs1, actor_Qs2 = self.critic(observations, policy_actions)
actor_Qs = torch.min(actor_Qs1, actor_Qs2).squeeze(-1)
actor_loss = torch.mean(self.alpha.detach()*policy_actions_logprob - actor_Qs)

self.optimizer_zero_grad()
actor_loss.backward()
actor_grad_norm = nn.utils.clip_grad_norm_(self.actor.parameters(), self.config['agent.max_grad_norm'])
self.actor_optimizer.step()

alpha_loss = torch.mean(self.log_alpha*(-policy_actions_logprob - self.target_entropy).detach())

self.optimizer_zero_grad()
alpha_loss.backward()
self.log_alpha_optimizer.step()

self.polyak_update_target()

out['actor_loss'].append(actor_loss)
out['alpha_loss'].append(alpha_loss)
# Update Actor
out_actor = self.choose_action(observations, mode='train')
policy_actions = out_actor['action']
policy_actions_logprob = out_actor['action_logprob']

actor_Qs1, actor_Qs2 = self.critic(observations, policy_actions)
actor_Qs = torch.min(actor_Qs1, actor_Qs2).squeeze(-1)
actor_loss = torch.mean(self.alpha.detach()*policy_actions_logprob - actor_Qs)

self.optimizer_zero_grad()
actor_loss.backward()
actor_grad_norm = nn.utils.clip_grad_norm_(self.actor.parameters(), self.config['agent.max_grad_norm'])
self.actor_optimizer.step()

# Update alpha
alpha_loss = torch.mean(self.log_alpha*(-policy_actions_logprob - self.target_entropy).detach())

self.optimizer_zero_grad()
alpha_loss.backward()
self.log_alpha_optimizer.step()

self.polyak_update_target()

out['critic_loss'].append(critic_loss)
out['actor_loss'].append(actor_loss)
out['alpha_loss'].append(alpha_loss)
Q1_vals.append(Qs1)
Q2_vals.append(Qs2)
logprob_vals.append(policy_actions_logprob)
Expand Down
19 changes: 8 additions & 11 deletions baselines/sac/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,23 @@ def train(self, n=None, **kwargs):
dump_togo = 0
num_episode = 0
checkpoint_count = 0
observation = self.env.reset()
observation, _ = self.env.reset()
for i in count():
if i >= self.config['train.timestep']:
break
if i < self.config['replay.init_size']:
action = [self.env.action_space.sample()]
else:
action = self.agent.choose_action(observation, mode='stochastic')['action']
next_observation, reward, done, info = self.env.step(action)
next_observation, reward, step_info = self.env.step(action)
eval_togo += 1
dump_togo += 1
if done[0]: # [0] due to single environment
if step_info[0].last: # [0] due to single environment
start_time = perf_counter()
# NOTE: must use latest TimeLimit
reach_time_limit = info[0].get('TimeLimit.truncated', False)
reach_terminal = not reach_time_limit
self.replay.add(observation[0], action[0], reward[0], info[0]['last_observation'], reach_terminal)
self.replay.add(observation[0], action[0], reward[0], step_info[0]['last_observation'], step_info[0].terminal)

# updates in the end of episode, for each time step
out_agent = self.agent.learn(D=None, replay=self.replay, episode_length=info[0]['episode']['horizon'])
out_agent = self.agent.learn(D=None, replay=self.replay, episode_length=step_info[0]['episode']['horizon'])
num_episode += 1
if (i+1) >= int(self.config['train.timestep']*(checkpoint_count/(self.config['checkpoint.num'] - 1))):
self.agent.checkpoint(self.logdir, num_episode)
Expand All @@ -48,8 +45,8 @@ def train(self, n=None, **kwargs):
logger('accumulated_trained_timesteps', i + 1)
logger('accumulated_trained_episodes', num_episode)
[logger(key, value) for key, value in out_agent.items()]
logger('episode_return', info[0]['episode']['return'])
logger('episode_horizon', info[0]['episode']['horizon'])
logger('episode_return', step_info[0]['episode']['return'])
logger('episode_horizon', step_info[0]['episode']['horizon'])
train_logs.append(logger.logs)
if dump_togo >= self.config['log.freq']:
dump_togo %= self.config['log.freq']
Expand All @@ -59,7 +56,7 @@ def train(self, n=None, **kwargs):
eval_logs.append(self.eval(accumulated_trained_timesteps=(i+1),
accumulated_trained_episodes=num_episode))
else:
self.replay.add(observation[0], action[0], reward[0], next_observation[0], done[0])
self.replay.add(observation[0], action[0], reward[0], next_observation[0], step_info[0].terminal)
observation = next_observation
if checkpoint_count < self.config['checkpoint.num']:
self.agent.checkpoint(self.logdir, num_episode)
Expand Down
23 changes: 11 additions & 12 deletions baselines/sac/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,25 @@
from lagom.envs.wrappers import TimeLimit
from lagom.envs.wrappers import NormalizeAction
from lagom.envs.wrappers import VecMonitor
from lagom.envs.wrappers import VecStepInfo

from baselines.sac.agent import Agent
from baselines.sac.engine import Engine
from baselines.sac.replay_buffer import ReplayBuffer


config = Config(
{'cuda': True,
##########
'cuda_ids': [6],
###########
'log.dir': 'logs/default',
'log.freq': 1000, # every n timesteps
{'log.freq': 1000, # every n timesteps
'checkpoint.num': 3,

'env.id': Grid(['HalfCheetah-v3']),######, 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']),
'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']),

'agent.gamma': 0.99,
'agent.polyak': 0.995, # polyak averaging coefficient for targets update
'agent.actor.lr': 3e-4,
'agent.actor.use_lr_scheduler': False,
'agent.critic.lr': 3e-4,
'agent.critic.use_lr_scheduler': False,
'agent.policy_delay': 1, ########2,
'agent.initial_temperature': 1.0,
'agent.max_grad_norm': 999999, # grad clipping by norm

Expand All @@ -64,12 +59,12 @@ def _make_env():
return env


def run(config, seed, device):
def run(config, seed, device, logdir):
set_global_seeds(seed)
logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)

env = make_env(config, seed)
env = VecMonitor(env)
env = VecStepInfo(env)

eval_env = make_env(config, seed)
eval_env = VecMonitor(eval_env)
Expand All @@ -87,5 +82,9 @@ def run(config, seed, device):
if __name__ == '__main__':
run_experiment(run=run,
config=config,
seeds=[4153361530], #####3503522377, 2876994566, 172236777, 3949341511, 849059707],
num_worker=os.cpu_count())
seeds=[4153361530, 3503522377, 2876994566, 172236777, 3949341511, 849059707],
log_dir='logs/default',
max_workers=os.cpu_count(),
chunksize=1,
use_gpu=True,
gpu_ids=None)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"initial_reset_timestamp": 1557312462.6688251, "timestamps": [1557312476.7781692], "episode_lengths": [1000], "episode_rewards": [10573.852496180081], "episode_types": ["t", "t"]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"stats": "openaigym.episode_batch.8.2837515.stats.json", "videos": [["openaigym.video.8.2837515.video000000.mp4", "openaigym.video.8.2837515.video000000.meta.json"], ["openaigym.video.8.2837515.video000001.mp4", "openaigym.video.8.2837515.video000001.meta.json"]], "env_info": {"gym_version": "0.12.1", "env_id": "HalfCheetah-v3"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"episode_id": 0, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 3.4.4-0ubuntu0.18.04.1 Copyright (c) 2000-2018 the FFmpeg developers\\nbuilt with gcc 7 (Ubuntu 7.3.0-16ubuntu3)\\nconfiguration: --prefix=/usr --extra-version=0ubuntu0.18.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --enable-gpl --disable-stripping --enable-avresample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librubberband --enable-librsvg --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-omx --enable-openal --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libopencv --enable-libx264 --enable-shared\\nlibavutil 55. 78.100 / 55. 78.100\\nlibavcodec 57.107.100 / 57.107.100\\nlibavformat 57. 83.100 / 57. 83.100\\nlibavdevice 57. 10.100 / 57. 10.100\\nlibavfilter 6.107.100 / 6.107.100\\nlibavresample 3. 7. 0 / 3. 7. 0\\nlibswscale 4. 8.100 / 4. 8.100\\nlibswresample 2. 9.100 / 2. 9.100\\nlibpostproc 54. 7.100 / 54. 7.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-r", "20", "-f", "rawvideo", "-s:v", "500x500", "-pix_fmt", "rgb24", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "/home/zuo/Code/lagom/baselines/sac/logs/default/0/4153361530/anim/openaigym.video.8.2837515.video000000.mp4"]}}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"episode_id": 1, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 3.4.4-0ubuntu0.18.04.1 Copyright (c) 2000-2018 the FFmpeg developers\\nbuilt with gcc 7 (Ubuntu 7.3.0-16ubuntu3)\\nconfiguration: --prefix=/usr --extra-version=0ubuntu0.18.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --enable-gpl --disable-stripping --enable-avresample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librubberband --enable-librsvg --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-omx --enable-openal --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libopencv --enable-libx264 --enable-shared\\nlibavutil 55. 78.100 / 55. 78.100\\nlibavcodec 57.107.100 / 57.107.100\\nlibavformat 57. 83.100 / 57. 83.100\\nlibavdevice 57. 10.100 / 57. 10.100\\nlibavfilter 6.107.100 / 6.107.100\\nlibavresample 3. 7. 0 / 3. 7. 0\\nlibswscale 4. 8.100 / 4. 8.100\\nlibswresample 2. 9.100 / 2. 9.100\\nlibpostproc 54. 7.100 / 54. 7.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-r", "20", "-f", "rawvideo", "-s:v", "500x500", "-pix_fmt", "rgb24", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "/home/zuo/Code/lagom/baselines/sac/logs/default/0/4153361530/anim/openaigym.video.8.2837515.video000001.mp4"]}}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
21 changes: 21 additions & 0 deletions baselines/sac/logs/___default/0/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
ID: 0
cuda: true
log.dir: logs/default
log.freq: 1000
checkpoint.num: 3
agent.gamma: 0.99
agent.polyak: 0.995
agent.actor.lr: 0.0003
agent.actor.use_lr_scheduler: false
agent.critic.lr: 0.0003
agent.critic.use_lr_scheduler: false
agent.policy_delay: 1
agent.initial_temperature: 1.0
agent.max_grad_norm: 999999
replay.capacity: 1000000
replay.init_size: 10000
replay.batch_size: 256
train.timestep: 1000000
eval.freq: 5000
eval.num_episode: 10
env.id: HalfCheetah-v3
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"initial_reset_timestamp": 1557312374.8916614, "timestamps": [1557312389.6250508], "episode_lengths": [1000], "episode_rewards": [3499.633093221702], "episode_types": ["t", "t"]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"stats": "openaigym.episode_batch.6.2837515.stats.json", "videos": [["openaigym.video.6.2837515.video000000.mp4", "openaigym.video.6.2837515.video000000.meta.json"], ["openaigym.video.6.2837515.video000001.mp4", "openaigym.video.6.2837515.video000001.meta.json"]], "env_info": {"gym_version": "0.12.1", "env_id": "Hopper-v3"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"episode_id": 0, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 3.4.4-0ubuntu0.18.04.1 Copyright (c) 2000-2018 the FFmpeg developers\\nbuilt with gcc 7 (Ubuntu 7.3.0-16ubuntu3)\\nconfiguration: --prefix=/usr --extra-version=0ubuntu0.18.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --enable-gpl --disable-stripping --enable-avresample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librubberband --enable-librsvg --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-omx --enable-openal --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libopencv --enable-libx264 --enable-shared\\nlibavutil 55. 78.100 / 55. 78.100\\nlibavcodec 57.107.100 / 57.107.100\\nlibavformat 57. 83.100 / 57. 83.100\\nlibavdevice 57. 10.100 / 57. 10.100\\nlibavfilter 6.107.100 / 6.107.100\\nlibavresample 3. 7. 0 / 3. 7. 0\\nlibswscale 4. 8.100 / 4. 8.100\\nlibswresample 2. 9.100 / 2. 9.100\\nlibpostproc 54. 7.100 / 54. 7.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-r", "125", "-f", "rawvideo", "-s:v", "500x500", "-pix_fmt", "rgb24", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "/home/zuo/Code/lagom/baselines/sac/logs/default/1/4153361530/anim/openaigym.video.6.2837515.video000000.mp4"]}}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"episode_id": 1, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 3.4.4-0ubuntu0.18.04.1 Copyright (c) 2000-2018 the FFmpeg developers\\nbuilt with gcc 7 (Ubuntu 7.3.0-16ubuntu3)\\nconfiguration: --prefix=/usr --extra-version=0ubuntu0.18.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --enable-gpl --disable-stripping --enable-avresample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librubberband --enable-librsvg --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-omx --enable-openal --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libopencv --enable-libx264 --enable-shared\\nlibavutil 55. 78.100 / 55. 78.100\\nlibavcodec 57.107.100 / 57.107.100\\nlibavformat 57. 83.100 / 57. 83.100\\nlibavdevice 57. 10.100 / 57. 10.100\\nlibavfilter 6.107.100 / 6.107.100\\nlibavresample 3. 7. 0 / 3. 7. 0\\nlibswscale 4. 8.100 / 4. 8.100\\nlibswresample 2. 9.100 / 2. 9.100\\nlibpostproc 54. 7.100 / 54. 7.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-r", "125", "-f", "rawvideo", "-s:v", "500x500", "-pix_fmt", "rgb24", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "/home/zuo/Code/lagom/baselines/sac/logs/default/1/4153361530/anim/openaigym.video.6.2837515.video000001.mp4"]}}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
21 changes: 21 additions & 0 deletions baselines/sac/logs/___default/1/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
ID: 1
cuda: true
log.dir: logs/default
log.freq: 1000
checkpoint.num: 3
agent.gamma: 0.99
agent.polyak: 0.995
agent.actor.lr: 0.0003
agent.actor.use_lr_scheduler: false
agent.critic.lr: 0.0003
agent.critic.use_lr_scheduler: false
agent.policy_delay: 1
agent.initial_temperature: 1.0
agent.max_grad_norm: 999999
replay.capacity: 1000000
replay.init_size: 1000
replay.batch_size: 256
train.timestep: 1000000
eval.freq: 5000
eval.num_episode: 10
env.id: Hopper-v3
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"initial_reset_timestamp": 1557312413.6163213, "timestamps": [1557312427.6062093], "episode_lengths": [1000], "episode_rewards": [4632.640245432929], "episode_types": ["t", "t"]}

0 comments on commit 9bdbedd

Please sign in to comment.