In [None]:
from casadi import vertcat
from gym import spaces, Env
from env.ecm_parameter import dynamic_state
import numpy as np

# Only simulation env

DISCRETE = False


class ETM(Env):

    def __init__(self, sett, cont_sett):
        self.setting = sett
        self.control_set = cont_sett
        self.dt = self.setting['sample_time']

        self.discrete = DISCRETE

        self.u = np.zeros(1)
        self.init_SOC = self.control_set['constraints']['init_soc']
        self.init_ir = 0
        self.init_h = 0
        self.init_t = self.control_set['constraints']['temperature']['init']

        # X_vector:soc,ir,h,Tc,Ts
        self.x_init = np.array([self.init_SOC, self.init_ir, self.init_h, self.init_t, self.init_t])

        self.x, self.V_batt = dynamic_state(self.x_init, self.u, self.init_t)

        # y_vector:Vt,Ts
        self.y = vertcat(self.V_batt, self.x[4])
        self.y = self.y.full().reshape(self.y.shape[0], )

        # state:SOC,Vt,Tc
        self.state = vertcat(self.x[0], self.V_batt, self.x[3])
        self.state = self.state.full().reshape(self.state.shape[0], )

        self.M = len(self.state)

        self._max_episode_steps = 3600
        self.episode_step = 0

        self.info = dict()
        self.info['SOC'] = self.x_init[0]
        self.info['ir'] = self.x_init[1]
        self.info['h'] = self.x_init[2]
        self.info['Tc'] = self.x_init[3]
        self.info['Ts'] = self.x_init[4]
        self.info['Vt'] = self.V_batt
        self.info['I'] = self.u

        # Target SOC
        self.SOC_desired = self.control_set['references']['soc']

        if self.discrete:
            self.currents = np.linspace(-100, 0, 20)

    @property
    def observation_space(self):
        return spaces.Box(low=0, high=49521, shape=(self.M,), dtype=np.float32)

    @property
    def action_space(self):
        if self.discrete:
            return spaces.Discrete(20)
        else:  # continuous case.
            return spaces.Box(dtype=np.float32, low=-15, high=0, shape=(1,))

    def step(self, action):

        is_done = False

        if self.discrete:
            action = self.currents[action]
        else:
            action = np.clip(action, a_min=self.action_space.low, a_max=self.action_space.high)[0]

        self.u = action

        # ekf input:vt,Ts
        self.x, self.V_batt = dynamic_state(self.x, self.u, self.init_t)

        # Reward function
        r_soc = -2 * abs(self.SOC_desired - self.x[0])

        reward = r_soc

        # cost function
        c_volt = 1 if (self.V_batt > self.control_set['constraints']['voltage']['max']) else 0
        c_tc = 1 if (self.x[3] > self.control_set['constraints']['temperature']['TCmax']) else 0
        cost = c_volt + c_tc

        self.episode_step += 1

        if self.x[0] >= self.SOC_desired or self.episode_step >= self._max_episode_steps:
            is_done = True
        else:
            is_done = False

        # update info_X
        self.info['SOC'] = self.x[0]
        self.info['ir'] = self.x[1]
        self.info['h'] = self.x[2]
        self.info['Tc'] = self.x[3]
        self.info['Ts'] = self.x[4]
        self.info['Vt'] = self.V_batt
        self.info['I'] = self.u

        return np.concatenate((self.x[0], self.V_batt, self.x[3]), axis=None), reward, cost, is_done, self.info

    def reset(self):
        self.__init__(self.setting, self.control_set)

        return np.concatenate((self.x[0], self.V_batt, self.x[3]), axis=None)
