# RL for Channel Selection
## Top-level simulation script

## Define functions to allow importing from Jupyter notebooks
Ref: https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Importing%20Notebooks.html

NB: This code should be moved to a utility library.

In [None]:
import io, os, sys, types
from IPython import get_ipython
from nbformat import read
from IPython.core.interactiveshell import InteractiveShell
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def find_notebook(fullname, path=None):
    """find a notebook, given its fully qualified name and an optional path

    This turns "foo.bar" into "foo/bar.ipynb"
    and tries turning "Foo_Bar" into "Foo Bar" if Foo_Bar
    does not exist.
    """
    name = fullname.rsplit('.', 1)[-1]
    if not path:
        path = ['']
    for d in path:
        nb_path = os.path.join(d, name + ".ipynb")
        if os.path.isfile(nb_path):
            return nb_path
        # let import Notebook_Name find "Notebook Name.ipynb"
        nb_path = nb_path.replace("_", " ")
        if os.path.isfile(nb_path):
            return nb_path
        
class NotebookLoader(object):
    """Module Loader for Jupyter Notebooks"""
    def __init__(self, path=None):
        self.shell = InteractiveShell.instance()
        self.path = path

    def load_module(self, fullname):
        """import a notebook as a module"""
        path = find_notebook(fullname, self.path)

        print ("importing Jupyter notebook from %s" % path)

        # load the notebook object
        with io.open(path, 'r', encoding='utf-8') as f:
            nb = read(f, 4)


        # create the module and add it to sys.modules
        # if name in sys.modules:
        #    return sys.modules[name]
        mod = types.ModuleType(fullname)
        mod.__file__ = path
        mod.__loader__ = self
        mod.__dict__['get_ipython'] = get_ipython
        sys.modules[fullname] = mod

        # extra work to ensure that magics that would affect the user_ns
        # actually affect the notebook module's ns
        save_user_ns = self.shell.user_ns
        self.shell.user_ns = mod.__dict__

        try:
          for cell in nb.cells:
            if cell.cell_type == 'code':
                # transform the input to executable Python
                code = self.shell.input_transformer_manager.transform_cell(cell.source)
                # run the code in themodule
                exec(code, mod.__dict__)
        finally:
            self.shell.user_ns = save_user_ns
        return mod
    
class NotebookFinder(object):
    """Module finder that locates Jupyter Notebooks"""
    def __init__(self):
        self.loaders = {}

    def find_module(self, fullname, path=None):
        nb_path = find_notebook(fullname, path)
        if not nb_path:
            return

        key = path
        if path:
            # lists aren't hashable
            key = os.path.sep.join(path)

        if key not in self.loaders:
            self.loaders[key] = NotebookLoader(path)
        return self.loaders[key]
    
sys.meta_path.append(NotebookFinder())

## Begin main simulation run script.

In [None]:
from env import Env
from RL_brain import DeepQNetwork

## Simulation configuration parameters

In [None]:
sim = {
    'cfg' : {
        'run' : { # run parameters
            'numEpisodes'    : 1,  # num. of episodes to run
            'maxSteps'       : 50000, # max. num. of sim. steps
            'startLearnStep' : 200,  # start periodic training after this step num.
            'learnPeriod'    : 5,    # num. of steps between (re)training
        },
        'dqn' : { # DQN parameters
            'learningRate'      : 0.01, # please provide summary comments for each parameter
            'rewardDecay'       : 0.9,      
             'eGreedy'           : 1,
            'eGreedyincrement'  :0.005, # Change the e_greedy value
            'replaceTargetIter' : 200,
            'memorySize'        : 2000,
        }
    }
}

## The run() command runs the main simulation loop.

In [None]:
def run():
    # For ease of access to run parameters.
    step = 0
    numEpisodes = sim['cfg']['run']['numEpisodes']
    maxSteps = sim['cfg']['run']['maxSteps']
    startLearnStep = sim['cfg']['run']['startLearnStep']
    learnPeriod = sim['cfg']['run']['learnPeriod']
    
    for episode in range(numEpisodes):
        # initial observation
        step = 0
        env_state_1 = env.reset()
        
        while step < maxSteps:
            # fresh env

            # Output current observation:
            #print('{}: current env = {}\n'.format(env.time, env.__dict__))
            print('{}: current env = {}\n'.format(env.time,env.time_env_state))

            env_state = np.hstack((env_state_1["Channel_1"], env_state_1["Channel_6"],
                                   env_state_1["Channel_11"]))
            # RL choose action based on env_state

            action = RL.choose_action(env_state)
            if action == 0:
                action_ = "Channel_1"
            elif action == 1:
                action_ = "Channel_6"
            else:
                action_ = "Channel_11"
            observation = env_state_1[action_]
            print('{}: action_ = {}, observation = {}\n'.format(env.time, action_, observation))
            # print(observation)
            # RL take action and get next observation and reward
            env_state_, observation_, reward = env.step(action_)
            env_state_ = np.hstack(( env_state_["Channel_1"],  env_state_["Channel_6"],
                                      env_state_["Channel_11"]))


            RL.store_transition(env_state, action, reward, env_state_)

            if (step > startLearnStep) and (step % learnPeriod == 0):
                RL.learn()
            env_state_={"Channel_1":env_state_[0:4],
                        "Channel_6":env_state_[4:8],
                        "Channel_11":env_state_[8:12]

            }

            # swap observation
            #observation = observation_
            env_state_1 = env_state_

            # break while loop when end of this episode
            step += 1



## Main simulation routine

In [None]:
if __name__ == "__main__":
    # maze game
    env = Env()
    
    # For ease of access to DQN config. parameters.
    learningRate = sim['cfg']['dqn']['learningRate']
    rewardDecay = sim['cfg']['dqn']['rewardDecay']
    eGreedy = sim['cfg']['dqn']['eGreedy']
    e_greedy_increment = sim['cfg']['dqn']['eGreedyincrement']
    replaceTargetIter = sim['cfg']['dqn']['replaceTargetIter']
    rewardDecay = sim['cfg']['dqn']['learningRate']    
    memorySize = sim['cfg']['dqn']['memorySize']
    
    RL = DeepQNetwork(env.n_actions, env.n_features,
                      learning_rate=learningRate,
                      reward_decay=rewardDecay,
                      e_greedy=eGreedy,
                      replace_target_iter=replaceTargetIter,
                      e_greedy_increment = e_greedy_increment,
                      memory_size=memorySize,                   
                      )
    #    env.data()
    run()
    RL.plot_cost()
   # env.plot_count()
    plt.figure()



    plt.plot(
        np.divide(np.cumsum([1 if i >= 1 else 0 for i in env.count_history]), np.arange(len(env.count_history)) +1),
        '.-')
    plt.xlabel('Training Step')
    plt.ylabel('Cumulative % selection action is max. value action')
    plt.grid(True)
    plt.show()



   
