In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 
import scipy.io as sio

from vehicle_model_variant import Environment 
from cell_model import CellModel 
from driver_MDP import Driver_MDP 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
# env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)
driver = Driver_MDP(0.02)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 200
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1.0 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 5000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 
    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
#     actor_model.load_weights("./DDPG1_trial1/actor_model_checkpoint")
#     critic_model.load_weights("./DDPG1_trial1/critic_model_checkpoint")
#     target_actor.load_weights("./DDPG1_trial1/target_actor_checkpoint")
#     target_critic.load_weights("./DDPG1_trial1/target_critic_checkpoint")
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    if not os.path.exists(root): 
        os.makedirs(root)
        
    actor_model.save_weights("./{}/actor_model.h5".format(root))
    critic_model.save_weights("./{}/critic_model.h5".format(root))
    target_actor.save_weights("./{}/target_actor.h5".format(root))
    target_critic.save_weights("./{}/target_critic.h5".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [14]:
def test_agent(actor_model, reward_factor):
    test_cycle = driver.get_cycle() 
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0
    state = env.reset() 
    while True: 
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        action = policy_epsilon_greedy(tf_state, -1)
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
        
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    
    print("******************* Test is start *****************")
#     print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    plt.subplot(1, 2, 1)
    plt.plot(test_cycle)
    plt.subplot(1, 2, 2)
    plt.plot(env.history["Action"])
    plt.show() 
    return env.history  
    

In [15]:
def update_reward_factor(reward_factor_temp, result_dict, thresh): 
    SOCs = result_dict["SOCs"]
    SOC_at_equilibrium = np.mean(SOCs[-10:])
    if abs(SOC_at_equilibrium - 0.6) < thresh: 
        terminal = True 
        reward_factor = reward_factor_temp 
    else: 
        terminal = False 
        reward_factor = reward_factor_temp + 5 * (0.6 - SOC_at_equilibrium)
    return reward_factor, terminal 

In [16]:
results_dict = {} 
reward_factor_temp = 1.5

driving_cycle_path = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
driving_cycle = sio.loadmat(driving_cycle_path)
driving_cycle = driving_cycle["sch_cycle"][:, 1]
while True: 
    print("")
    print("reward factor = {}".format(reward_factor_temp))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    episode_test_history = [] 
    episode_num_test = [] 
    for ep in range(total_episodes): 
        env = initialization_env(driving_cycle, reward_factor_temp)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 
        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption)
        
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
        print("")
        
        if (ep + 1) % 10 == 0: 
#             history = test_agent(actor_model, reward_factor_temp)
            history = env.history 
            episode_test_history.append(history) 
            episode_num_test.append(ep + 1)
            
    root = "cycleOne_reward_factor{}".format(reward_factor_temp)
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[reward_factor_temp] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs, 
        "test_history": episode_test_history, 
        "test_episode_num": episode_num_test,
    }
    
    reward_factor_temp, terminal = update_reward_factor(reward_factor_temp, 
                                                        results_dict[reward_factor_temp], 
                                                        0.015) 
    if terminal: 
        break 
    
    


reward factor = 1.5

maximum steps, simulation is done ... 
elapsed_time: 16.408
Episode: 1 Exploration P: 1.0000 Total reward: -925.5790850376394 SOC: 0.9994 Cumulative_SOC_deviation: 488.2358 Fuel Consumption: 193.2254

maximum steps, simulation is done ... 
elapsed_time: 16.431
Episode: 2 Exploration P: 1.0000 Total reward: -921.1534891301686 SOC: 1.0000 Cumulative_SOC_deviation: 487.0446 Fuel Consumption: 190.5865

maximum steps, simulation is done ... 
elapsed_time: 16.776
Episode: 3 Exploration P: 1.0000 Total reward: -926.2353738718288 SOC: 1.0000 Cumulative_SOC_deviation: 490.2058 Fuel Consumption: 190.9267



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.ba

maximum steps, simulation is done ... 
elapsed_time: 88.253
Episode: 28 Exploration P: 0.5172 Total reward: -326.855561305813 SOC: 0.8473 Cumulative_SOC_deviation: 121.1925 Fuel Consumption: 145.0668

maximum steps, simulation is done ... 
elapsed_time: 86.974
Episode: 29 Exploration P: 0.5034 Total reward: -237.15087385459438 SOC: 0.7472 Cumulative_SOC_deviation: 67.1550 Fuel Consumption: 136.4184

maximum steps, simulation is done ... 
elapsed_time: 87.529
Episode: 30 Exploration P: 0.4901 Total reward: -249.91533238805408 SOC: 0.6740 Cumulative_SOC_deviation: 78.7089 Fuel Consumption: 131.8520

maximum steps, simulation is done ... 
elapsed_time: 86.114
Episode: 31 Exploration P: 0.4771 Total reward: -311.80549228784014 SOC: 0.6211 Cumulative_SOC_deviation: 122.0856 Fuel Consumption: 128.6772

maximum steps, simulation is done ... 
elapsed_time: 83.483
Episode: 32 Exploration P: 0.4644 Total reward: -241.0238952415473 SOC: 0.7261 Cumulative_SOC_deviation: 70.0875 Fuel Consumption: 1

  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (


maximum steps, simulation is done ... 
elapsed_time: 79.770
Episode: 47 Exploration P: 0.3110 Total reward: -832.6119354231329 SOC: 0.0492 Cumulative_SOC_deviation: 495.7743 Fuel Consumption: 88.9504

battery power is 4556.04963138524(+) but condition is not avail
elapsed_time: 72.247
Episode: 48 Exploration P: 0.3036 Total reward: -1742.3436841159255 SOC: -0.0001 Cumulative_SOC_deviation: 444.8426 Fuel Consumption: 75.9808

battery power is 9851.64969299313(+) but condition is not avail
elapsed_time: 74.774
Episode: 49 Exploration P: 0.2961 Total reward: -1798.9029718501974 SOC: -0.0014 Cumulative_SOC_deviation: 480.1742 Fuel Consumption: 79.5446



  2 * r_dis)) * (v_dis - (v_dis ** 2 - 4 * r_dis * p_bat) ** (0.5)) * (p_bat >= 0)


battery power is 5654.301400547441(+) but condition is not avail
elapsed_time: 56.742
Episode: 50 Exploration P: 0.2905 Total reward: -1581.0990693713838 SOC: -0.0006 Cumulative_SOC_deviation: 350.3111 Fuel Consumption: 56.5342

battery power is 5694.218662498944(+) but condition is not avail
elapsed_time: 52.979
Episode: 51 Exploration P: 0.2855 Total reward: -1501.8963041552472 SOC: -0.0009 Cumulative_SOC_deviation: 301.8423 Fuel Consumption: 50.0349

battery power is 8257.857718697076(+) but condition is not avail
elapsed_time: 53.639
Episode: 52 Exploration P: 0.2804 Total reward: -1483.73599010138 SOC: -0.0010 Cumulative_SOC_deviation: 289.4735 Fuel Consumption: 50.4280

battery power is 6932.917864542715(+) but condition is not avail
elapsed_time: 74.615
Episode: 53 Exploration P: 0.2735 Total reward: -1798.1731287841153 SOC: -0.0002 Cumulative_SOC_deviation: 480.0958 Fuel Consumption: 78.9306

battery power is 3661.0084202781763(+) but condition is not avail
elapsed_time: 48.252

battery power is 6434.207245289934(+) but condition is not avail
elapsed_time: 93.934
Episode: 89 Exploration P: 0.1145 Total reward: -1977.5476678509795 SOC: -0.0006 Cumulative_SOC_deviation: 509.0243 Fuel Consumption: 214.9130

maximum steps, simulation is done ... 
elapsed_time: 107.101
Episode: 90 Exploration P: 0.1117 Total reward: -953.3477615642516 SOC: 0.0659 Cumulative_SOC_deviation: 584.0079 Fuel Consumption: 77.3360

maximum steps, simulation is done ... 
elapsed_time: 106.841
Episode: 91 Exploration P: 0.1089 Total reward: -1024.5205786727945 SOC: 0.1601 Cumulative_SOC_deviation: 623.2221 Fuel Consumption: 89.6875

battery power is 4556.04963138524(+) but condition is not avail
elapsed_time: 79.878
Episode: 92 Exploration P: 0.1069 Total reward: -1688.9285362041926 SOC: -0.0001 Cumulative_SOC_deviation: 414.1606 Fuel Consumption: 68.5887

maximum steps, simulation is done ... 
elapsed_time: 107.698
Episode: 93 Exploration P: 0.1043 Total reward: -863.1350635681117 SOC: 0.15

maximum steps, simulation is done ... 
elapsed_time: 106.825
Episode: 128 Exploration P: 0.0518 Total reward: -1046.7317694538183 SOC: 0.8815 Cumulative_SOC_deviation: 513.0331 Fuel Consumption: 277.1821

maximum steps, simulation is done ... 
elapsed_time: 107.054
Episode: 129 Exploration P: 0.0506 Total reward: -869.2796071421229 SOC: 0.3112 Cumulative_SOC_deviation: 515.5657 Fuel Consumption: 95.9311

maximum steps, simulation is done ... 
elapsed_time: 106.750
Episode: 130 Exploration P: 0.0495 Total reward: -578.4133294393876 SOC: 0.2709 Cumulative_SOC_deviation: 324.0633 Fuel Consumption: 92.3183

maximum steps, simulation is done ... 
elapsed_time: 106.936
Episode: 131 Exploration P: 0.0485 Total reward: -824.3142963735457 SOC: 0.2295 Cumulative_SOC_deviation: 489.3872 Fuel Consumption: 90.2334

maximum steps, simulation is done ... 
elapsed_time: 107.364
Episode: 132 Exploration P: 0.0474 Total reward: -759.2505406712573 SOC: 0.3225 Cumulative_SOC_deviation: 436.8899 Fuel Consu

maximum steps, simulation is done ... 
elapsed_time: 114.411
Episode: 169 Exploration P: 0.0235 Total reward: -441.1653582505954 SOC: 0.2261 Cumulative_SOC_deviation: 226.5656 Fuel Consumption: 101.3170

maximum steps, simulation is done ... 
elapsed_time: 108.958
Episode: 170 Exploration P: 0.0232 Total reward: -442.662367161812 SOC: 0.9106 Cumulative_SOC_deviation: 197.9790 Fuel Consumption: 145.6939

maximum steps, simulation is done ... 
elapsed_time: 110.485
Episode: 171 Exploration P: 0.0228 Total reward: -643.3758600730906 SOC: 0.8923 Cumulative_SOC_deviation: 333.9602 Fuel Consumption: 142.4356

maximum steps, simulation is done ... 
elapsed_time: 119.330
Episode: 172 Exploration P: 0.0225 Total reward: -755.9123686894783 SOC: 0.8247 Cumulative_SOC_deviation: 413.0297 Fuel Consumption: 136.3678

maximum steps, simulation is done ... 
elapsed_time: 117.730
Episode: 173 Exploration P: 0.0221 Total reward: -562.4418780473516 SOC: 0.9258 Cumulative_SOC_deviation: 276.7860 Fuel Cons



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

maximum steps, simulation is done ... 
elapsed_time: 50.294
Episode: 4 Exploration P: 0.9903 Total reward: -1868.0528964531343 SOC: 0.9999 Cumulative_SOC_deviation: 472.0996 Fuel Consumption: 189.0610

maximum steps, simulation is done ... 
elapsed_time: 105.879
Episode: 5 Exploration P: 0.9638 Total reward: -1921.1628892942574 SOC: 0.9994 Cumulative_SOC_deviation: 487.9113 Fuel Consumption: 185

maximum steps, simulation is done ... 
elapsed_time: 93.715
Episode: 38 Exploration P: 0.3954 Total reward: -1017.8478056767303 SOC: 0.3213 Cumulative_SOC_deviation: 256.5619 Fuel Consumption: 105.4019

maximum steps, simulation is done ... 
elapsed_time: 84.497
Episode: 39 Exploration P: 0.3849 Total reward: -1129.8613989173605 SOC: 0.2572 Cumulative_SOC_deviation: 289.3622 Fuel Consumption: 100.7633

maximum steps, simulation is done ... 
elapsed_time: 83.746
Episode: 40 Exploration P: 0.3748 Total reward: -1008.4024206665543 SOC: 0.3362 Cumulative_SOC_deviation: 253.6858 Fuel Consumption: 106.1853

maximum steps, simulation is done ... 
elapsed_time: 84.509
Episode: 41 Exploration P: 0.3649 Total reward: -1398.726706155613 SOC: 0.1536 Cumulative_SOC_deviation: 366.7762 Fuel Consumption: 94.3105

maximum steps, simulation is done ... 
elapsed_time: 83.267
Episode: 42 Exploration P: 0.3553 Total reward: -1494.3873786657955 SOC: 0.1380 Cumulative_SOC_deviation: 393.8249 Fuel Consumptio

battery power is 12122.78881593525(+) but condition is not avail
elapsed_time: 43.857
Episode: 78 Exploration P: 0.1466 Total reward: -1790.8995900056216 SOC: -0.0007 Cumulative_SOC_deviation: 216.0500 Fuel Consumption: 24.6701

battery power is 8446.065336248754(+) but condition is not avail
elapsed_time: 34.584
Episode: 79 Exploration P: 0.1450 Total reward: -1701.8780992628876 SOC: -0.0002 Cumulative_SOC_deviation: 192.0966 Fuel Consumption: 20.8345

maximum steps, simulation is done ... 
elapsed_time: 79.846
Episode: 80 Exploration P: 0.1414 Total reward: -853.4763064001396 SOC: 0.5300 Cumulative_SOC_deviation: 206.9454 Fuel Consumption: 117.4884

maximum steps, simulation is done ... 
elapsed_time: 83.214
Episode: 81 Exploration P: 0.1378 Total reward: -798.5002106998916 SOC: 0.4711 Cumulative_SOC_deviation: 193.0171 Fuel Consumption: 112.0474

maximum steps, simulation is done ... 
elapsed_time: 81.457
Episode: 82 Exploration P: 0.1343 Total reward: -873.1144844456272 SOC: 0.4257

battery power is 8529.93162924432(+) but condition is not avail
elapsed_time: 48.002
Episode: 119 Exploration P: 0.0555 Total reward: -2339.586567780293 SOC: -0.0011 Cumulative_SOC_deviation: 332.3781 Fuel Consumption: 159.7194

battery power is 5529.075879811097(+) but condition is not avail
elapsed_time: 42.299
Episode: 120 Exploration P: 0.0549 Total reward: -2198.6019788174726 SOC: -0.0003 Cumulative_SOC_deviation: 292.0388 Fuel Consumption: 162.1203

maximum steps, simulation is done ... 
elapsed_time: 82.535
Episode: 121 Exploration P: 0.0537 Total reward: -2211.4136889586566 SOC: 0.0689 Cumulative_SOC_deviation: 599.6123 Fuel Consumption: 78.9308

maximum steps, simulation is done ... 
elapsed_time: 83.130
Episode: 122 Exploration P: 0.0525 Total reward: -845.1435350332277 SOC: 0.5245 Cumulative_SOC_deviation: 203.0625 Fuel Consumption: 122.9648

maximum steps, simulation is done ... 
elapsed_time: 83.364
Episode: 123 Exploration P: 0.0513 Total reward: -1158.9724157538267 SOC: 

maximum steps, simulation is done ... 
elapsed_time: 82.779
Episode: 160 Exploration P: 0.0250 Total reward: -1545.3436369816704 SOC: 0.2794 Cumulative_SOC_deviation: 408.6501 Fuel Consumption: 92.0059

maximum steps, simulation is done ... 
elapsed_time: 83.005
Episode: 161 Exploration P: 0.0246 Total reward: -1826.6508618686469 SOC: 0.2662 Cumulative_SOC_deviation: 486.6724 Fuel Consumption: 95.8319

maximum steps, simulation is done ... 
elapsed_time: 82.333
Episode: 162 Exploration P: 0.0242 Total reward: -465.09235613494087 SOC: 0.5019 Cumulative_SOC_deviation: 100.4589 Fuel Consumption: 107.8168

maximum steps, simulation is done ... 
elapsed_time: 82.241
Episode: 163 Exploration P: 0.0238 Total reward: -906.4897408480857 SOC: 0.3945 Cumulative_SOC_deviation: 226.7907 Fuel Consumption: 99.9233

maximum steps, simulation is done ... 
elapsed_time: 82.309
Episode: 164 Exploration P: 0.0234 Total reward: -1095.339327150381 SOC: 0.3933 Cumulative_SOC_deviation: 279.2358 Fuel Consumpt

maximum steps, simulation is done ... 
elapsed_time: 13.367
Episode: 1 Exploration P: 1.0000 Total reward: -2743.3598044426453 SOC: 1.0000 Cumulative_SOC_deviation: 491.0020 Fuel Consumption: 196.3281

maximum steps, simulation is done ... 
elapsed_time: 12.830
Episode: 2 Exploration P: 1.0000 Total reward: -2669.5156209411703 SOC: 1.0000 Cumulative_SOC_deviation: 477.8994 Fuel Consumption: 190.4525

maximum steps, simulation is done ... 
elapsed_time: 12.835
Episode: 3 Exploration P: 1.0000 Total reward: -2685.4384005535344 SOC: 0.9999 Cumulative_SOC_deviation: 481.5670 Fuel Consumption: 187.3499



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('f

maximum steps, simulation is done ... 
elapsed_time: 81.834
Episode: 28 Exploration P: 0.5172 Total reward: -531.911301535736 SOC: 0.7825 Cumulative_SOC_deviation: 75.6487 Fuel Consumption: 139.4902

maximum steps, simulation is done ... 
elapsed_time: 81.828
Episode: 29 Exploration P: 0.5034 Total reward: -704.6114484310483 SOC: 0.8116 Cumulative_SOC_deviation: 108.3725 Fuel Consumption: 142.4382

maximum steps, simulation is done ... 
elapsed_time: 81.710
Episode: 30 Exploration P: 0.4901 Total reward: -909.1523975553845 SOC: 0.9070 Cumulative_SOC_deviation: 146.2706 Fuel Consumption: 150.3856

maximum steps, simulation is done ... 
elapsed_time: 81.801
Episode: 31 Exploration P: 0.4771 Total reward: -442.71425886228246 SOC: 0.6726 Cumulative_SOC_deviation: 59.8229 Fuel Consumption: 132.3880

maximum steps, simulation is done ... 
elapsed_time: 81.608
Episode: 32 Exploration P: 0.4644 Total reward: -679.585365456946 SOC: 0.6040 Cumulative_SOC_deviation: 106.5327 Fuel Consumption: 126

maximum steps, simulation is done ... 
elapsed_time: 82.402
Episode: 69 Exploration P: 0.1745 Total reward: -222.2167739687198 SOC: 0.5837 Cumulative_SOC_deviation: 21.2040 Fuel Consumption: 112.2229

maximum steps, simulation is done ... 
elapsed_time: 81.604
Episode: 70 Exploration P: 0.1701 Total reward: -225.8161587431492 SOC: 0.5850 Cumulative_SOC_deviation: 21.9379 Fuel Consumption: 112.0154

maximum steps, simulation is done ... 
elapsed_time: 81.456
Episode: 71 Exploration P: 0.1657 Total reward: -211.34566216552923 SOC: 0.5853 Cumulative_SOC_deviation: 19.3510 Fuel Consumption: 110.9638

maximum steps, simulation is done ... 
elapsed_time: 82.656
Episode: 72 Exploration P: 0.1615 Total reward: -235.57395876141777 SOC: 0.5816 Cumulative_SOC_deviation: 23.8623 Fuel Consumption: 111.7905

maximum steps, simulation is done ... 
elapsed_time: 82.590
Episode: 73 Exploration P: 0.1574 Total reward: -265.94395763189664 SOC: 0.5786 Cumulative_SOC_deviation: 29.8637 Fuel Consumption: 11

maximum steps, simulation is done ... 
elapsed_time: 82.557
Episode: 110 Exploration P: 0.0634 Total reward: -308.1091008085588 SOC: 0.5899 Cumulative_SOC_deviation: 38.4382 Fuel Consumption: 108.7144

maximum steps, simulation is done ... 
elapsed_time: 82.568
Episode: 111 Exploration P: 0.0619 Total reward: -291.1878866653279 SOC: 0.5847 Cumulative_SOC_deviation: 35.3905 Fuel Consumption: 107.6028

maximum steps, simulation is done ... 
elapsed_time: 82.511
Episode: 112 Exploration P: 0.0605 Total reward: -232.91367177934225 SOC: 0.5923 Cumulative_SOC_deviation: 24.0331 Fuel Consumption: 108.2438

maximum steps, simulation is done ... 
elapsed_time: 82.561
Episode: 113 Exploration P: 0.0591 Total reward: -331.1925864919727 SOC: 0.5586 Cumulative_SOC_deviation: 43.2542 Fuel Consumption: 106.8148

maximum steps, simulation is done ... 
elapsed_time: 82.853
Episode: 114 Exploration P: 0.0578 Total reward: -256.66425572795646 SOC: 0.5920 Cumulative_SOC_deviation: 28.5885 Fuel Consumption

maximum steps, simulation is done ... 
elapsed_time: 73.684
Episode: 151 Exploration P: 0.0273 Total reward: -334.43311929369435 SOC: 0.5705 Cumulative_SOC_deviation: 43.6831 Fuel Consumption: 107.8307

maximum steps, simulation is done ... 
elapsed_time: 73.663
Episode: 152 Exploration P: 0.0268 Total reward: -358.0232527805112 SOC: 0.5635 Cumulative_SOC_deviation: 48.6652 Fuel Consumption: 105.5766

maximum steps, simulation is done ... 
elapsed_time: 73.745
Episode: 153 Exploration P: 0.0264 Total reward: -299.8155656388894 SOC: 0.5915 Cumulative_SOC_deviation: 37.0571 Fuel Consumption: 107.5847

maximum steps, simulation is done ... 
elapsed_time: 73.835
Episode: 154 Exploration P: 0.0259 Total reward: -316.5882812542667 SOC: 0.5618 Cumulative_SOC_deviation: 40.7870 Fuel Consumption: 105.0091

maximum steps, simulation is done ... 
elapsed_time: 73.581
Episode: 155 Exploration P: 0.0255 Total reward: -291.6818216763988 SOC: 0.5851 Cumulative_SOC_deviation: 35.6411 Fuel Consumption:

maximum steps, simulation is done ... 
elapsed_time: 77.067
Episode: 192 Exploration P: 0.0156 Total reward: -509.0550071102955 SOC: 0.5508 Cumulative_SOC_deviation: 77.9443 Fuel Consumption: 104.7252

maximum steps, simulation is done ... 
elapsed_time: 77.470
Episode: 193 Exploration P: 0.0155 Total reward: -592.8205072563501 SOC: 0.5419 Cumulative_SOC_deviation: 94.2232 Fuel Consumption: 104.0454

maximum steps, simulation is done ... 
elapsed_time: 77.043
Episode: 194 Exploration P: 0.0153 Total reward: -464.19983270533015 SOC: 0.5597 Cumulative_SOC_deviation: 69.2530 Fuel Consumption: 104.9554

maximum steps, simulation is done ... 
elapsed_time: 76.563
Episode: 195 Exploration P: 0.0152 Total reward: -479.9496626789605 SOC: 0.5309 Cumulative_SOC_deviation: 72.6575 Fuel Consumption: 103.0447

maximum steps, simulation is done ... 
elapsed_time: 77.111
Episode: 196 Exploration P: 0.0150 Total reward: -548.9563213708333 SOC: 0.5257 Cumulative_SOC_deviation: 85.6818 Fuel Consumption:

maximum steps, simulation is done ... 
elapsed_time: 68.285
Episode: 18 Exploration P: 0.6774 Total reward: -2232.4729827445044 SOC: 0.9990 Cumulative_SOC_deviation: 382.6109 Fuel Consumption: 155.9107

maximum steps, simulation is done ... 
elapsed_time: 68.094
Episode: 19 Exploration P: 0.6594 Total reward: -2018.4135234153005 SOC: 1.0000 Cumulative_SOC_deviation: 343.1700 Fuel Consumption: 155.9107

maximum steps, simulation is done ... 
elapsed_time: 68.094
Episode: 20 Exploration P: 0.6418 Total reward: -2144.1110537573945 SOC: 0.9927 Cumulative_SOC_deviation: 366.9624 Fuel Consumption: 152.4781

maximum steps, simulation is done ... 
elapsed_time: 68.202
Episode: 21 Exploration P: 0.6247 Total reward: -1487.9537135677886 SOC: 0.9775 Cumulative_SOC_deviation: 246.1477 Fuel Consumption: 152.0246

maximum steps, simulation is done ... 
elapsed_time: 68.127
Episode: 22 Exploration P: 0.6080 Total reward: -1760.2963869998198 SOC: 0.9977 Cumulative_SOC_deviation: 296.0824 Fuel Consumpt

maximum steps, simulation is done ... 
elapsed_time: 69.185
Episode: 59 Exploration P: 0.2265 Total reward: -221.30902210521543 SOC: 0.5840 Cumulative_SOC_deviation: 19.4899 Fuel Consumption: 115.5305

maximum steps, simulation is done ... 
elapsed_time: 69.185
Episode: 60 Exploration P: 0.2206 Total reward: -204.5083947834429 SOC: 0.5941 Cumulative_SOC_deviation: 16.5732 Fuel Consumption: 114.5598

maximum steps, simulation is done ... 
elapsed_time: 69.016
Episode: 61 Exploration P: 0.2149 Total reward: -250.7562656234992 SOC: 0.5816 Cumulative_SOC_deviation: 25.3589 Fuel Consumption: 113.1247

maximum steps, simulation is done ... 
elapsed_time: 69.447
Episode: 62 Exploration P: 0.2094 Total reward: -213.53966401999222 SOC: 0.5864 Cumulative_SOC_deviation: 18.2303 Fuel Consumption: 114.5976

maximum steps, simulation is done ... 
elapsed_time: 69.613
Episode: 63 Exploration P: 0.2040 Total reward: -217.58014015397617 SOC: 0.5904 Cumulative_SOC_deviation: 19.2678 Fuel Consumption: 11

maximum steps, simulation is done ... 
elapsed_time: 70.049
Episode: 100 Exploration P: 0.0802 Total reward: -270.2642817490949 SOC: 0.5740 Cumulative_SOC_deviation: 30.0128 Fuel Consumption: 107.3742

maximum steps, simulation is done ... 
elapsed_time: 69.436
Episode: 101 Exploration P: 0.0783 Total reward: -276.5882031941912 SOC: 0.5945 Cumulative_SOC_deviation: 30.7492 Fuel Consumption: 109.7014

maximum steps, simulation is done ... 
elapsed_time: 69.574
Episode: 102 Exploration P: 0.0765 Total reward: -274.3656738751572 SOC: 0.5625 Cumulative_SOC_deviation: 30.7545 Fuel Consumption: 107.4504

maximum steps, simulation is done ... 
elapsed_time: 69.237
Episode: 103 Exploration P: 0.0747 Total reward: -331.5883542009868 SOC: 0.5644 Cumulative_SOC_deviation: 41.4045 Fuel Consumption: 106.8715

maximum steps, simulation is done ... 
elapsed_time: 69.631
Episode: 104 Exploration P: 0.0729 Total reward: -292.00741655190785 SOC: 0.5780 Cumulative_SOC_deviation: 33.9545 Fuel Consumption:

maximum steps, simulation is done ... 
elapsed_time: 69.592
Episode: 141 Exploration P: 0.0328 Total reward: -357.66002840664225 SOC: 0.5854 Cumulative_SOC_deviation: 46.0534 Fuel Consumption: 107.7124

maximum steps, simulation is done ... 
elapsed_time: 69.509
Episode: 142 Exploration P: 0.0322 Total reward: -290.5575764909751 SOC: 0.5831 Cumulative_SOC_deviation: 33.6905 Fuel Consumption: 107.7073

maximum steps, simulation is done ... 
elapsed_time: 69.902
Episode: 143 Exploration P: 0.0316 Total reward: -358.67203132682187 SOC: 0.5746 Cumulative_SOC_deviation: 46.4313 Fuel Consumption: 106.6734

maximum steps, simulation is done ... 
elapsed_time: 70.072
Episode: 144 Exploration P: 0.0310 Total reward: -270.20852940556625 SOC: 0.5805 Cumulative_SOC_deviation: 29.9990 Fuel Consumption: 107.3935

maximum steps, simulation is done ... 
elapsed_time: 69.938
Episode: 145 Exploration P: 0.0304 Total reward: -285.6671947711722 SOC: 0.5895 Cumulative_SOC_deviation: 32.7351 Fuel Consumptio

maximum steps, simulation is done ... 
elapsed_time: 70.139
Episode: 182 Exploration P: 0.0174 Total reward: -268.0436732519352 SOC: 0.5931 Cumulative_SOC_deviation: 29.3341 Fuel Consumption: 108.8375

maximum steps, simulation is done ... 
elapsed_time: 70.232
Episode: 183 Exploration P: 0.0172 Total reward: -246.57438061045823 SOC: 0.5905 Cumulative_SOC_deviation: 25.4574 Fuel Consumption: 108.4080

maximum steps, simulation is done ... 
elapsed_time: 69.773
Episode: 184 Exploration P: 0.0170 Total reward: -284.6473924408073 SOC: 0.5893 Cumulative_SOC_deviation: 32.4300 Fuel Consumption: 108.6384

maximum steps, simulation is done ... 
elapsed_time: 69.519
Episode: 185 Exploration P: 0.0168 Total reward: -382.76353280079593 SOC: 0.5910 Cumulative_SOC_deviation: 50.5894 Fuel Consumption: 108.1973

maximum steps, simulation is done ... 
elapsed_time: 69.390
Episode: 186 Exploration P: 0.0166 Total reward: -372.21157924938126 SOC: 0.5873 Cumulative_SOC_deviation: 48.5124 Fuel Consumptio

maximum steps, simulation is done ... 
elapsed_time: 68.378
Episode: 8 Exploration P: 0.8883 Total reward: -2733.950426227674 SOC: 0.9999 Cumulative_SOC_deviation: 463.6159 Fuel Consumption: 179.7735

maximum steps, simulation is done ... 
elapsed_time: 68.324
Episode: 9 Exploration P: 0.8646 Total reward: -2682.9402344639475 SOC: 0.9997 Cumulative_SOC_deviation: 455.7190 Fuel Consumption: 172.2694

maximum steps, simulation is done ... 
elapsed_time: 68.279
Episode: 10 Exploration P: 0.8414 Total reward: -2694.123021853392 SOC: 1.0000 Cumulative_SOC_deviation: 457.8685 Fuel Consumption: 171.6097

maximum steps, simulation is done ... 
elapsed_time: 68.393
Episode: 11 Exploration P: 0.8189 Total reward: -2544.0178884201605 SOC: 1.0000 Cumulative_SOC_deviation: 431.7732 Fuel Consumption: 165.2703

maximum steps, simulation is done ... 
elapsed_time: 68.450
Episode: 12 Exploration P: 0.7970 Total reward: -2524.9651240116855 SOC: 1.0000 Cumulative_SOC_deviation: 428.5787 Fuel Consumption:

maximum steps, simulation is done ... 
elapsed_time: 60.516
Episode: 49 Exploration P: 0.2949 Total reward: -167.60038074361825 SOC: 0.5924 Cumulative_SOC_deviation: 9.1650 Fuel Consumption: 117.1079

maximum steps, simulation is done ... 
elapsed_time: 60.225
Episode: 50 Exploration P: 0.2872 Total reward: -160.21412720536236 SOC: 0.5928 Cumulative_SOC_deviation: 7.9806 Fuel Consumption: 116.2468

maximum steps, simulation is done ... 
elapsed_time: 60.168
Episode: 51 Exploration P: 0.2797 Total reward: -166.5392238276592 SOC: 0.6052 Cumulative_SOC_deviation: 8.8862 Fuel Consumption: 117.5828

maximum steps, simulation is done ... 
elapsed_time: 60.438
Episode: 52 Exploration P: 0.2724 Total reward: -186.6137096657855 SOC: 0.5857 Cumulative_SOC_deviation: 13.0027 Fuel Consumption: 114.9788

maximum steps, simulation is done ... 
elapsed_time: 60.207
Episode: 53 Exploration P: 0.2653 Total reward: -209.10632228948666 SOC: 0.5935 Cumulative_SOC_deviation: 16.8513 Fuel Consumption: 116.2

maximum steps, simulation is done ... 
elapsed_time: 60.777
Episode: 90 Exploration P: 0.1024 Total reward: -234.14499336949763 SOC: 0.5779 Cumulative_SOC_deviation: 22.5321 Fuel Consumption: 110.0098

maximum steps, simulation is done ... 
elapsed_time: 60.973
Episode: 91 Exploration P: 0.0999 Total reward: -291.37275457296954 SOC: 0.5843 Cumulative_SOC_deviation: 32.9273 Fuel Consumption: 109.9679

maximum steps, simulation is done ... 
elapsed_time: 61.023
Episode: 92 Exploration P: 0.0975 Total reward: -288.9747403533649 SOC: 0.5665 Cumulative_SOC_deviation: 32.8396 Fuel Consumption: 108.0529

maximum steps, simulation is done ... 
elapsed_time: 60.412
Episode: 93 Exploration P: 0.0951 Total reward: -366.2076002133026 SOC: 0.5702 Cumulative_SOC_deviation: 46.7838 Fuel Consumption: 108.4637

maximum steps, simulation is done ... 
elapsed_time: 60.674
Episode: 94 Exploration P: 0.0928 Total reward: -258.754623677634 SOC: 0.5865 Cumulative_SOC_deviation: 27.0327 Fuel Consumption: 109.

maximum steps, simulation is done ... 
elapsed_time: 60.690
Episode: 131 Exploration P: 0.0400 Total reward: -259.56565831917925 SOC: 0.5718 Cumulative_SOC_deviation: 26.9122 Fuel Consumption: 111.2993

maximum steps, simulation is done ... 
elapsed_time: 60.616
Episode: 132 Exploration P: 0.0392 Total reward: -263.0980473339719 SOC: 0.5782 Cumulative_SOC_deviation: 27.5589 Fuel Consumption: 111.2691

maximum steps, simulation is done ... 
elapsed_time: 60.373
Episode: 133 Exploration P: 0.0384 Total reward: -266.1825453568144 SOC: 0.5694 Cumulative_SOC_deviation: 28.2706 Fuel Consumption: 110.4328

maximum steps, simulation is done ... 
elapsed_time: 60.578
Episode: 134 Exploration P: 0.0376 Total reward: -344.7285176503148 SOC: 0.5371 Cumulative_SOC_deviation: 42.8382 Fuel Consumption: 108.7222

maximum steps, simulation is done ... 
elapsed_time: 61.142
Episode: 135 Exploration P: 0.0369 Total reward: -348.04959698682137 SOC: 0.5569 Cumulative_SOC_deviation: 43.0510 Fuel Consumption

maximum steps, simulation is done ... 
elapsed_time: 60.350
Episode: 172 Exploration P: 0.0197 Total reward: -412.5231302074668 SOC: 0.5053 Cumulative_SOC_deviation: 55.6706 Fuel Consumption: 105.8195

maximum steps, simulation is done ... 
elapsed_time: 60.297
Episode: 173 Exploration P: 0.0195 Total reward: -395.23506018064046 SOC: 0.5721 Cumulative_SOC_deviation: 51.1837 Fuel Consumption: 113.2509

maximum steps, simulation is done ... 
elapsed_time: 60.359
Episode: 174 Exploration P: 0.0192 Total reward: -311.9255047551172 SOC: 0.5211 Cumulative_SOC_deviation: 36.9612 Fuel Consumption: 108.2968

maximum steps, simulation is done ... 
elapsed_time: 60.709
Episode: 175 Exploration P: 0.0190 Total reward: -348.1299143728166 SOC: 0.5790 Cumulative_SOC_deviation: 42.6842 Fuel Consumption: 112.9719

maximum steps, simulation is done ... 
elapsed_time: 60.578
Episode: 176 Exploration P: 0.0187 Total reward: -456.65728450708576 SOC: 0.5245 Cumulative_SOC_deviation: 63.6936 Fuel Consumption



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

maximum steps, simulation is done ... 
elapsed_time: 26.458
Episode: 4 Exploration P: 0.9903 Total reward: -2959.803591380475 SOC: 1.0000 Cumulative_SOC_deviation: 486.7523 Fuel Consumption: 193.4831

maximum steps, simulation is done ... 
elapsed_time: 59.723
Episode: 5 Exploration P: 0.9638 Total reward: -2884.050920989086 SOC: 1.0000 Cumulative_SOC_deviation: 474.3228 Fuel Consumption: 188.3703

maximum steps, simulation is done ... 
elapsed_time: 58.833
Episode: 6 Exploration P: 0.9379 Total reward: -2942.125081409351 SOC: 1.0000 Cumulative_SOC_deviation: 485.1979 Fuel Consumption: 184.6389

maximum steps, simulation is done ... 
elapsed_time: 60.148
Episode: 7 Exploration P: 0.9128 Tot

maximum steps, simulation is done ... 
elapsed_time: 59.991
Episode: 41 Exploration P: 0.3649 Total reward: -1451.8253150913265 SOC: 0.3847 Cumulative_SOC_deviation: 236.6351 Fuel Consumption: 106.9759

maximum steps, simulation is done ... 
elapsed_time: 60.002
Episode: 42 Exploration P: 0.3553 Total reward: -1181.9034683819648 SOC: 0.4519 Cumulative_SOC_deviation: 188.3462 Fuel Consumption: 111.4905

maximum steps, simulation is done ... 
elapsed_time: 60.090
Episode: 43 Exploration P: 0.3459 Total reward: -1655.8967023765074 SOC: 0.3170 Cumulative_SOC_deviation: 273.5510 Fuel Consumption: 101.2464

maximum steps, simulation is done ... 
elapsed_time: 60.370
Episode: 44 Exploration P: 0.3368 Total reward: -176.55136820470642 SOC: 0.6188 Cumulative_SOC_deviation: 9.6769 Fuel Consumption: 121.5552

maximum steps, simulation is done ... 
elapsed_time: 60.341
Episode: 45 Exploration P: 0.3280 Total reward: -186.4086522219656 SOC: 0.6059 Cumulative_SOC_deviation: 11.7903 Fuel Consumption:

maximum steps, simulation is done ... 
elapsed_time: 61.398
Episode: 82 Exploration P: 0.1251 Total reward: -325.3688733448226 SOC: 0.5688 Cumulative_SOC_deviation: 38.0677 Fuel Consumption: 109.0217

maximum steps, simulation is done ... 
elapsed_time: 61.242
Episode: 83 Exploration P: 0.1220 Total reward: -299.00967399790994 SOC: 0.5796 Cumulative_SOC_deviation: 33.1776 Fuel Consumption: 110.4541

maximum steps, simulation is done ... 
elapsed_time: 60.571
Episode: 84 Exploration P: 0.1190 Total reward: -318.8127884660092 SOC: 0.5761 Cumulative_SOC_deviation: 36.8676 Fuel Consumption: 109.2862

maximum steps, simulation is done ... 
elapsed_time: 60.602
Episode: 85 Exploration P: 0.1160 Total reward: -267.26260232379377 SOC: 0.5728 Cumulative_SOC_deviation: 27.8313 Fuel Consumption: 109.0911

maximum steps, simulation is done ... 
elapsed_time: 60.527
Episode: 86 Exploration P: 0.1131 Total reward: -281.71158477151846 SOC: 0.5749 Cumulative_SOC_deviation: 30.2564 Fuel Consumption: 10

maximum steps, simulation is done ... 
elapsed_time: 61.082
Episode: 123 Exploration P: 0.0473 Total reward: -202.87243968709274 SOC: 0.5931 Cumulative_SOC_deviation: 16.4398 Fuel Consumption: 109.4416

maximum steps, simulation is done ... 
elapsed_time: 60.832
Episode: 124 Exploration P: 0.0463 Total reward: -230.19528896837485 SOC: 0.5736 Cumulative_SOC_deviation: 21.6478 Fuel Consumption: 107.1660

maximum steps, simulation is done ... 
elapsed_time: 60.724
Episode: 125 Exploration P: 0.0453 Total reward: -259.730127487612 SOC: 0.5773 Cumulative_SOC_deviation: 26.6781 Fuel Consumption: 108.1129

maximum steps, simulation is done ... 
elapsed_time: 60.955
Episode: 126 Exploration P: 0.0444 Total reward: -303.4805870253835 SOC: 0.5640 Cumulative_SOC_deviation: 34.4645 Fuel Consumption: 107.6110

maximum steps, simulation is done ... 
elapsed_time: 61.166
Episode: 127 Exploration P: 0.0435 Total reward: -215.15697617322806 SOC: 0.5979 Cumulative_SOC_deviation: 18.3924 Fuel Consumption

maximum steps, simulation is done ... 
elapsed_time: 61.120
Episode: 164 Exploration P: 0.0221 Total reward: -169.70544354585482 SOC: 0.5891 Cumulative_SOC_deviation: 11.0365 Fuel Consumption: 106.9825

maximum steps, simulation is done ... 
elapsed_time: 60.764
Episode: 165 Exploration P: 0.0218 Total reward: -185.74239171446362 SOC: 0.5878 Cumulative_SOC_deviation: 13.8931 Fuel Consumption: 106.7850

maximum steps, simulation is done ... 
elapsed_time: 60.925
Episode: 166 Exploration P: 0.0215 Total reward: -179.3101733196951 SOC: 0.5874 Cumulative_SOC_deviation: 12.7312 Fuel Consumption: 106.9560

maximum steps, simulation is done ... 
elapsed_time: 60.943
Episode: 167 Exploration P: 0.0212 Total reward: -180.71676101439573 SOC: 0.5884 Cumulative_SOC_deviation: 12.9454 Fuel Consumption: 107.1452

maximum steps, simulation is done ... 
elapsed_time: 61.010
Episode: 168 Exploration P: 0.0209 Total reward: -186.42170635985056 SOC: 0.5908 Cumulative_SOC_deviation: 13.9122 Fuel Consumpti

In [17]:
with open("DDPG_adaptive_reward_factor.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [18]:
# results_dict