# Channel environment where the agent is located

In this part, the environment and environment in which the agent is located are mainly defined as the reward of the action selected by the agent. In class Env, the following variables and functions are defined:
variable:
* _actions_: indicates the optional actions of the agent, using the list to represent
* _n_actions_: record the number of optional actions, used in the neural network output in subsequent DQN
* _n_features_: The number of attributes for recording observations, 4 in this project
* _time_env_state_: use a dictionary to record a specific moment, the state of the environment
function:
* \_\__init_\_\_(_self_): initialize the function, define the variable
* _update_\__State_(_self_): get the current environment information from the environment data, and return
* _reset_(_self_): reset the environment, (this can be removed)
* _step_(_self_, _action_): according to the action selected by the agent, return the status value and reward of the next moment of the agent

In [1]:
import time
import numpy as np
import random

In [2]:
np.random.seed(2)

In [None]:
class Env( ):
    def __init__(self):
        super(Env, self).__init__()
        self.actions = [
            "Channel_1",
            "Channel_6",
            "Channel_11"
        ]

        self.tx_power_list = [20, 24, 27, 30]
        self.n_actions = len(self.actions)
        self.n_features = 4
        self.state =""
        self.time = 1
        self.count = 1
        self.count_history = []
        self.time_env_state = {}

    def data(self):
        RSSI_1 = random.randint(-100, 0)
        RSSI_2 = random.randint(-100, 0)
        tx_power = random.choice(self.tx_power_list)
        spectral_density = random.uniform(0, 100)
        channel = np.array([RSSI_1, RSSI_2, tx_power, spectral_density])
        return channel
    
    def update_State(self):
        self.time_env_state["current"] = {"Channel_1": self.data(),
                                         "Channel_6": self.data(),
                                         "Channel_11": self.data(),}

        return self.time_env_state["current"]

    def reset(self):
        self.update_State()
        self.state = "Channel_1"
        return self.time_env_state["current"][self.state]

    def value(self, state):
        # state[:2] = [state[0], state[1]] = [RSSI_1, RSSI_2]
        # state[2] = tx_power
        # state[3] = spectral_density
        RSSI_1 = state[0]
        RSSI_2 = state[1]
        tx_power = state[2]
        spectral_density = state[3]
        
        # Value returned is -power_sum([RSSI_1, RSSI_2]) + tx_power + (100 - spectral_density)
        # Higher, more positive is better, hence abs() for power sum of RSSIs.
        return np.abs(10*np.log10(np.sum(10**(np.array([RSSI_1, RSSI_2])/10)))) + tx_power + (100 - spectral_density)
        # pass

    def step(self, action):
        self.time += 1
        self.update_State()
        value = 0
        max_value = -np.Inf # min. possible value
        action_key = ""
        # """
        # Find value function output for each chan. in current state.
        for key in self.time_env_state["current"]:

            value = self.value(self.time_env_state["current"][key])
            
            # 比较 - compare & update current max. value, return action for max. value
            if value > max_value:
                max_value = value
                action_key = key
        # """
        
        print('{}: action {} has max. value {}\n'.format(self.time, action_key, max_value))
        
        # print(self.time)
        next_state = action

        if next_state == action_key:
            self.count += 1
            reward = self.count
            print('{}: selected action = action with max. value, reward = {}\n'.format(self.time, action, reward))
            
        else:
            reward = 0
            self.count = 1
            
        self.count_history.append(self.count)
            
        self.state = next_state
        return self.time_env_state["current"][next_state], reward
