In [1]:
# !sudo apt-get install -y xvfb ffmpeg
# !pip install -q atari_py
# !pip install -q gym[atari]
# !pip install -q imageio
# !pip install -q PILLOW
# !pip install -q pyglet
# !pip install -q typing-extensions==3.7.4.3
# !pip install -q pyvirtualdisplay
!pip install ../../OSAR-keras/.
# !pip install -q git+https://github.com/ustyuzhaninky/OSAR-keras

Processing /mnt/d/jorgen/Projects/python/PhDSub/TowerProject/OSAR-keras


Building wheels for collected packages: OSAR-nightly
  Building wheel for OSAR-nightly (setup.py) ... [?25ldone
[?25h  Created wheel for OSAR-nightly: filename=OSAR_nightly-0.1.16.dev20210402-cp38-cp38-linux_x86_64.whl size=75274 sha256=61206a393d0a7ef1b61bf3ec35890420d7c5898cb7e4a02fd2aa93cdd7fd2cef
  Stored in directory: /home/jorgen/.cache/pip/wheels/ad/5b/5c/4f851e7d9aac2a5cd80b951a12e2e6b7a0c65ee91437a17249
Successfully built OSAR-nightly
Installing collected packages: OSAR-nightly
  Attempting uninstall: OSAR-nightly
    Found existing installation: OSAR-nightly 0.1.16.dev20210402
    Uninstalling OSAR-nightly-0.1.16.dev20210402:
      Successfully uninstalled OSAR-nightly-0.1.16.dev20210402
Successfully installed OSAR-nightly-0.1.16.dev20210402


In [2]:
from __future__ import absolute_import, division, print_function

import base64
import imageio
import IPython
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import PIL.Image
import pyvirtualdisplay

import tensorflow as tf

from tf_agents import agents
from tf_agents.agents.dqn import dqn_agent
from tf_agents.drivers import dynamic_step_driver
from tf_agents.environments import suite_gym
from tf_agents.environments import tf_py_environment
from tf_agents.eval import metric_utils
from tf_agents.metrics import tf_metrics
from tf_agents.networks import q_network
from tf_agents.policies import random_tf_policy
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.trajectories import trajectory
from tf_agents.utils import common

from OSAR import PuDiscriminator, PuStateSotringActor, Runner, TrialAgent

In [3]:
env_names = [
    'Alien-ram-v0',
    'MontezumaRevenge-ram-v0',
    'AirRaid-ram-v0',
    'Solaris-ram-v0',
    'Berzerk-ram-v0',
    'Asteroids-ram-v0',
    'Venture-ram-v0',
    'Asteroids-ram-v0',
    'BattleZone-ram-v0',
    'Enduro-ram-v0',
    'Tutankham-ram-v0',
    'WizardOfWor-ram-v0',
]

In [4]:
num_iterations = 1000 # @param {type:"integer"}

initial_collect_steps = 100  # @param {type:"integer"} 
collect_steps_per_iteration = 1  # @param {type:"integer"}
replay_buffer_max_length = 1000  # @param {type:"integer"}

batch_size = 1  # @param {type:"integer"}
learning_rate = 1e-3  # @param {type:"number"}
log_interval = 20  # @param {type:"integer"}
memory_len = 10 # @param {type: "integer"}
n_turns = 3 # @param {type: "integer"}
num_atoms = 51  # @param {type:"integer"}
q_value = 10  # @param {type:"integer"}
n_step_update = 2  # @param {type:"integer"}

num_eval_episodes = 20  # @param {type:"integer"}
eval_interval = 1000  # @param {type:"integer"}

boltzmann_temperature = 0.1 # @param {type:"float"}
epsilon_greedy = None # @param {type:"float"}
conv_type = '1d' # @param {type:"str"}

In [5]:
fc_layer_params = (64, 64)

network_specs = {
    'batch_size': batch_size,
    'memory_len': memory_len,
    'n_turns': n_turns,
    'fc_layer_params': fc_layer_params,
    'num_atoms': num_atoms,
    'conv_type': conv_type,
    'learning_rate': learning_rate,
    'q_value': q_value,
    'n_step_update': n_step_update,
    'boltzmann_temperature': boltzmann_temperature,
    'epsilon_greedy': epsilon_greedy,
    'debug_summaries': True,
    'summarize_grads_and_vars': True,
}

def network_generator(
    observation_spec,
    action_spec,
    batch_size,
    memory_len,
    n_turns,
    fc_layer_params,
    num_atoms,
    conv_type,
    learning_rate,
    time_step_spec,
    n_step_update,
    train_step_counter,
    q_value=q_value,
    boltzmann_temperature=None,
    epsilon_greedy=0.1,
    debug_summaries=True,
    summarize_grads_and_vars=True,
    **kwargs,
    ):
    disc_net = PuDiscriminator(
        observation_spec,
        action_spec,
        batch_size,
        memory_len,
        n_turns,
        fc_layer_params=fc_layer_params,
        conv_type=conv_type,
        )
    exp_net = PuStateSotringActor(
        observation_spec,
        action_spec,
        batch_size,
        memory_len,
        n_turns,
        fc_layer_params=fc_layer_params,
        conv_type=conv_type,
        )
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    
    agent = TrialAgent(
        time_step_spec,
        action_spec,
        disctiminator=disc_net,
        expert=exp_net,
        optimizer=optimizer,
        pu_errors_loss_fn=tf.keras.losses.huber,#common.element_wise_squared_loss,
        boltzmann_temperature=boltzmann_temperature,
        epsilon_greedy=epsilon_greedy,
        debug_summaries=debug_summaries,
        train_step_counter = tf.Variable(0, dtype=tf.int64),
        summarize_grads_and_vars=summarize_grads_and_vars,
    )
    
    agent.initialize()
    
    return agent

    

In [6]:
experiment_common_specs = {
    'agent_specs': network_specs,
    'agent_generator': network_generator,
    'num_iterations': num_iterations,
    'initial_collect_steps': initial_collect_steps,
    'collect_steps_per_iteration': collect_steps_per_iteration,
    'replay_buffer_max_length': replay_buffer_max_length,
    'num_eval_episodes': num_eval_episodes,
    'eval_interval': eval_interval,
    'n_step_update': n_step_update,
}

In [7]:
configs = []
for name in env_names:
    config = experiment_common_specs.copy()
    config['env_name'] = name
    configs.append(config)

In [8]:
%%time
logpath = ''
model_name = 'test_pu'
runner = Runner(model_name=model_name, logpath=logpath, list_configs=configs)
runner.run(progress=False, experiment_progress=True)

Instructions for updating:
Do not call `graph_parents`.
Instructions for updating:
rename to distribute_datasets_from_function


  retval_ = ag__.and_((lambda : (ag__.ld(state) is not None)), (lambda : ag__.and_((lambda : (ag__.ld(state) is not ())), (lambda : (ag__.ld(state) is not [])))))


Instructions for updating:
Do not pass `graph_parents`.  They will  no longer be used.


Instructions for updating:
Do not pass `graph_parents`.  They will  no longer be used.
  ag__.if_stmt(ag__.not_(ag__.or_((lambda : (ag__.ld(policy_state) is None)), (lambda : ag__.or_((lambda : (ag__.ld(policy_state) is ())), (lambda : (ag__.ld(policy_state) is [])))))), if_body_2, else_body_2, get_state_2, set_state_2, (), 0)
  ag__.if_stmt((ag__.ld(policy_state) is ()), if_body_2, else_body_2, get_state_2, set_state_2, ('retval_', 'do_return'), 2)








Episode 1000: 100%|██████████| 1000/1000 [03:08<00:00,  5.30it/s, avg_return=0, train_loss=1.11e-7] 
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/jorgen/anaconda3/envs/osar/lib/python3.8/site-packages/IPython/core/magics/execution.py", line 1326, in time
    out = eval(code_2, glob, local_ns)
  File "<timed exec>", line 4, in <module>
  File "/home/jorgen/anaconda3/envs/osar/lib/python3.8/site-packages/OSAR/runner.py", line 508, in run
    returns = experiment(
  File "/home/jorgen/anaconda3/envs/osar/lib/python3.8/site-packages/OSAR/runner.py", line 303, in __call__
    returns = self.call(progress)
  File "/home/jorgen/anaconda3/envs/osar/lib/python3.8/site-packages/OSAR/runner.py", line 312, in call
    avg_return = self.get_eval_metrics()["AverageReturn"]
  File "/home/jorgen/anaconda3/envs/osar/lib/python3.8/site-packages/OSAR/runner.py", line 295, in get_eval_metrics
    self._eval_actor.run()
  File "/home/jorgen/anaconda3/envs/osar/lib/python3.8/site-packages/tf_agents/train/actor.py", line 139, in run
    self._time_step, self._policy_state = self._driver.run(
  File "/ho


KeyboardInterrupt



In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

In [None]:
# %tensorboard --logdir logs

In [None]:
# !kill 1369