In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 
import scipy.io as sio

from vehicle_model_variant import Environment 
from cell_model import CellModel 
from driver_MDP import Driver_MDP 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
# env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)
driver = Driver_MDP(0.02)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 200 
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1.0 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 5000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 
    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
#     actor_model.load_weights("./DDPG1_trial1/actor_model_checkpoint")
#     critic_model.load_weights("./DDPG1_trial1/critic_model_checkpoint")
#     target_actor.load_weights("./DDPG1_trial1/target_actor_checkpoint")
#     target_critic.load_weights("./DDPG1_trial1/target_critic_checkpoint")
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    if not os.path.exists(root): 
        os.makedirs(root)
        
    actor_model.save_weights("./{}/actor_model.h5".format(root))
    critic_model.save_weights("./{}/critic_model.h5".format(root))
    target_actor.save_weights("./{}/target_actor.h5".format(root))
    target_critic.save_weights("./{}/target_critic.h5".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [14]:
def test_agent(actor_model, reward_factor):
#     test_cycle = driver.get_cycle() 
    test_cycle_path = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
    test_cycle = sio.loadmat(test_cycle_path)
    test_cycle = test_cycle["sch_cycle"][:, 1]
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0
    state = env.reset() 
    while True: 
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        action = policy_epsilon_greedy(tf_state, -1)
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
        
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    
    print("******************* Test is start *****************")
#     print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    plt.subplot(1, 2, 1)
    plt.plot(test_cycle)
    plt.subplot(1, 2, 2)
    plt.plot(env.history["Action"])
    plt.show() 
    return env.history  
    

In [15]:
# print(env.version)

# num_trials = 1
reward_factors = [7, 8, 9, 10]
results_dict = {} 
driving_cycle_path = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
driving_cycle = sio.loadmat(driving_cycle_path)
driving_cycle = driving_cycle["sch_cycle"][:, 1]

for reward_factor in reward_factors: 
    print("")
    print("Trial {}".format(reward_factor))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    episode_test_history = [] 
    episode_num_test = [] 
    for ep in range(total_episodes): 
#         driving_cycle = driver.get_cycle() 
        env = initialization_env(driving_cycle, reward_factor)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
        print("")
        
        if (ep + 1) % 10 == 0: 
#             history = test_agent(actor_model, reward_factor)
            history = env.history 
            episode_test_history.append(history) 
            episode_num_test.append(ep + 1)
            
#         if (ep + 1) % 200 == 0:             
    root = "DDPG_cycleOne_reward_factor{}".format(reward_factor)
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
            
    
    results_dict[reward_factor] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs, 
        "test_history": episode_test_history, 
        "test_episode_num": episode_num_test, 
    }


Trial 7

maximum steps, simulation is done ... 
elapsed_time: 9.446
Episode: 1 Exploration P: 1.0000 Total reward: -3537.6415759272327 SOC: 1.0000 Cumulative_SOC_deviation: 478.6676 Fuel Consumption: 186.9685

maximum steps, simulation is done ... 
elapsed_time: 9.534
Episode: 2 Exploration P: 1.0000 Total reward: -3583.7075839103486 SOC: 0.9999 Cumulative_SOC_deviation: 484.9024 Fuel Consumption: 189.3908

maximum steps, simulation is done ... 
elapsed_time: 9.174
Episode: 3 Exploration P: 1.0000 Total reward: -3632.6107087301025 SOC: 1.0000 Cumulative_SOC_deviation: 491.8252 Fuel Consumption: 189.8341



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_fl

maximum steps, simulation is done ... 
elapsed_time: 86.614
Episode: 28 Exploration P: 0.5172 Total reward: -610.447839379491 SOC: 0.7546 Cumulative_SOC_deviation: 67.4387 Fuel Consumption: 138.3769

maximum steps, simulation is done ... 
elapsed_time: 88.892
Episode: 29 Exploration P: 0.5034 Total reward: -1073.9038336371468 SOC: 0.8278 Cumulative_SOC_deviation: 132.9929 Fuel Consumption: 142.9536

maximum steps, simulation is done ... 
elapsed_time: 87.308
Episode: 30 Exploration P: 0.4901 Total reward: -713.8030257373596 SOC: 0.7691 Cumulative_SOC_deviation: 82.1286 Fuel Consumption: 138.9026

maximum steps, simulation is done ... 
elapsed_time: 88.204
Episode: 31 Exploration P: 0.4771 Total reward: -467.14698729854325 SOC: 0.6317 Cumulative_SOC_deviation: 48.5163 Fuel Consumption: 127.5330

maximum steps, simulation is done ... 
elapsed_time: 86.243
Episode: 32 Exploration P: 0.4644 Total reward: -844.2427003307353 SOC: 0.6021 Cumulative_SOC_deviation: 102.6095 Fuel Consumption: 12

  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (


maximum steps, simulation is done ... 
elapsed_time: 80.219
Episode: 46 Exploration P: 0.3194 Total reward: -3419.069927615135 SOC: 0.4894 Cumulative_SOC_deviation: 470.8948 Fuel Consumption: 122.8067

maximum steps, simulation is done ... 
elapsed_time: 80.052
Episode: 47 Exploration P: 0.3110 Total reward: -180.07960002899995 SOC: 0.6153 Cumulative_SOC_deviation: 8.3698 Fuel Consumption: 121.4908

maximum steps, simulation is done ... 
elapsed_time: 80.108
Episode: 48 Exploration P: 0.3028 Total reward: -179.4393571430983 SOC: 0.6090 Cumulative_SOC_deviation: 8.3253 Fuel Consumption: 121.1621

maximum steps, simulation is done ... 
elapsed_time: 80.052
Episode: 49 Exploration P: 0.2949 Total reward: -173.86732768549598 SOC: 0.5982 Cumulative_SOC_deviation: 7.6981 Fuel Consumption: 119.9803

maximum steps, simulation is done ... 
elapsed_time: 79.966
Episode: 50 Exploration P: 0.2872 Total reward: -193.77791245426988 SOC: 0.6149 Cumulative_SOC_deviation: 10.2881 Fuel Consumption: 121.

maximum steps, simulation is done ... 
elapsed_time: 107.690
Episode: 87 Exploration P: 0.1104 Total reward: -282.5834149059805 SOC: 0.5842 Cumulative_SOC_deviation: 23.9886 Fuel Consumption: 114.6631

maximum steps, simulation is done ... 
elapsed_time: 107.418
Episode: 88 Exploration P: 0.1076 Total reward: -260.8614083727572 SOC: 0.5903 Cumulative_SOC_deviation: 20.9927 Fuel Consumption: 113.9123

maximum steps, simulation is done ... 
elapsed_time: 107.644
Episode: 89 Exploration P: 0.1050 Total reward: -374.1221542501641 SOC: 0.5518 Cumulative_SOC_deviation: 37.7654 Fuel Consumption: 109.7647

maximum steps, simulation is done ... 
elapsed_time: 108.144
Episode: 90 Exploration P: 0.1024 Total reward: -279.07862081970893 SOC: 0.5595 Cumulative_SOC_deviation: 24.0632 Fuel Consumption: 110.6365

maximum steps, simulation is done ... 
elapsed_time: 108.273
Episode: 91 Exploration P: 0.0999 Total reward: -283.07190820822274 SOC: 0.5883 Cumulative_SOC_deviation: 24.4728 Fuel Consumption

maximum steps, simulation is done ... 
elapsed_time: 107.821
Episode: 128 Exploration P: 0.0426 Total reward: -313.915422463254 SOC: 0.5808 Cumulative_SOC_deviation: 29.0192 Fuel Consumption: 110.7813

maximum steps, simulation is done ... 
elapsed_time: 107.528
Episode: 129 Exploration P: 0.0417 Total reward: -314.2068156518165 SOC: 0.5962 Cumulative_SOC_deviation: 28.8575 Fuel Consumption: 112.2040

maximum steps, simulation is done ... 
elapsed_time: 107.810
Episode: 130 Exploration P: 0.0408 Total reward: -284.5858338660207 SOC: 0.5894 Cumulative_SOC_deviation: 24.7518 Fuel Consumption: 111.3229

maximum steps, simulation is done ... 
elapsed_time: 108.552
Episode: 131 Exploration P: 0.0400 Total reward: -383.0345443741018 SOC: 0.5570 Cumulative_SOC_deviation: 38.9806 Fuel Consumption: 110.1704

maximum steps, simulation is done ... 
elapsed_time: 108.341
Episode: 132 Exploration P: 0.0392 Total reward: -459.0033897919349 SOC: 0.5733 Cumulative_SOC_deviation: 49.7582 Fuel Consumpti

maximum steps, simulation is done ... 
elapsed_time: 118.456
Episode: 169 Exploration P: 0.0206 Total reward: -246.22952713767089 SOC: 0.5707 Cumulative_SOC_deviation: 20.0057 Fuel Consumption: 106.1896

maximum steps, simulation is done ... 
elapsed_time: 94.323
Episode: 170 Exploration P: 0.0203 Total reward: -318.86266862398526 SOC: 0.5852 Cumulative_SOC_deviation: 30.3670 Fuel Consumption: 106.2937

maximum steps, simulation is done ... 
elapsed_time: 83.468
Episode: 171 Exploration P: 0.0200 Total reward: -295.10558456681116 SOC: 0.5856 Cumulative_SOC_deviation: 26.8500 Fuel Consumption: 107.1559

maximum steps, simulation is done ... 
elapsed_time: 79.485
Episode: 172 Exploration P: 0.0197 Total reward: -245.65122546876486 SOC: 0.5878 Cumulative_SOC_deviation: 19.7809 Fuel Consumption: 107.1848

maximum steps, simulation is done ... 
elapsed_time: 81.792
Episode: 173 Exploration P: 0.0195 Total reward: -273.6532548534311 SOC: 0.5760 Cumulative_SOC_deviation: 23.9251 Fuel Consumpt



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

maximum steps, simulation is done ... 
elapsed_time: 38.398
Episode: 4 Exploration P: 0.9903 Total reward: -4071.138180144975 SOC: 1.0000 Cumulative_SOC_deviation: 485.6655 Fuel Consumption: 185.8140

maximum steps, simulation is done ... 
elapsed_time: 82.589
Episode: 5 Exploration P: 0.9638 Total reward: -4056.071676603807 SOC: 1.0000 Cumulative_SOC_deviation: 483.7912 Fuel Consumption: 185.74

maximum steps, simulation is done ... 
elapsed_time: 84.220
Episode: 38 Exploration P: 0.3954 Total reward: -275.04281384001376 SOC: 0.6288 Cumulative_SOC_deviation: 18.6557 Fuel Consumption: 125.7974

maximum steps, simulation is done ... 
elapsed_time: 84.607
Episode: 39 Exploration P: 0.3849 Total reward: -224.34723788593342 SOC: 0.6015 Cumulative_SOC_deviation: 12.6705 Fuel Consumption: 122.9832

maximum steps, simulation is done ... 
elapsed_time: 85.715
Episode: 40 Exploration P: 0.3748 Total reward: -291.1943933367133 SOC: 0.6357 Cumulative_SOC_deviation: 20.8370 Fuel Consumption: 124.4982

maximum steps, simulation is done ... 
elapsed_time: 84.730
Episode: 41 Exploration P: 0.3649 Total reward: -209.32789361915323 SOC: 0.6094 Cumulative_SOC_deviation: 11.0383 Fuel Consumption: 121.0218

maximum steps, simulation is done ... 
elapsed_time: 82.151
Episode: 42 Exploration P: 0.3553 Total reward: -196.81769615861847 SOC: 0.5959 Cumulative_SOC_deviation: 9.7567 Fuel Consumption: 11

maximum steps, simulation is done ... 
elapsed_time: 102.171
Episode: 79 Exploration P: 0.1350 Total reward: -271.34208295447684 SOC: 0.5866 Cumulative_SOC_deviation: 20.1581 Fuel Consumption: 110.0775

maximum steps, simulation is done ... 
elapsed_time: 104.809
Episode: 80 Exploration P: 0.1316 Total reward: -205.74792679374372 SOC: 0.5871 Cumulative_SOC_deviation: 11.9305 Fuel Consumption: 110.3041

maximum steps, simulation is done ... 
elapsed_time: 105.416
Episode: 81 Exploration P: 0.1283 Total reward: -256.17169817656844 SOC: 0.5909 Cumulative_SOC_deviation: 18.1823 Fuel Consumption: 110.7136

maximum steps, simulation is done ... 
elapsed_time: 105.124
Episode: 82 Exploration P: 0.1251 Total reward: -244.3787999801278 SOC: 0.5904 Cumulative_SOC_deviation: 16.7941 Fuel Consumption: 110.0258

maximum steps, simulation is done ... 
elapsed_time: 104.976
Episode: 83 Exploration P: 0.1220 Total reward: -221.25924349096852 SOC: 0.5916 Cumulative_SOC_deviation: 13.8633 Fuel Consumpti

maximum steps, simulation is done ... 
elapsed_time: 83.054
Episode: 120 Exploration P: 0.0505 Total reward: -194.87962608180428 SOC: 0.5899 Cumulative_SOC_deviation: 10.6741 Fuel Consumption: 109.4865

maximum steps, simulation is done ... 
elapsed_time: 108.080
Episode: 121 Exploration P: 0.0495 Total reward: -189.34535668819763 SOC: 0.5898 Cumulative_SOC_deviation: 9.9942 Fuel Consumption: 109.3916

maximum steps, simulation is done ... 
elapsed_time: 104.459
Episode: 122 Exploration P: 0.0484 Total reward: -204.53314500065378 SOC: 0.5934 Cumulative_SOC_deviation: 11.8948 Fuel Consumption: 109.3746

maximum steps, simulation is done ... 
elapsed_time: 82.266
Episode: 123 Exploration P: 0.0473 Total reward: -163.58127726661343 SOC: 0.5944 Cumulative_SOC_deviation: 6.8684 Fuel Consumption: 108.6343

maximum steps, simulation is done ... 
elapsed_time: 86.594
Episode: 124 Exploration P: 0.0463 Total reward: -173.7721754007397 SOC: 0.5909 Cumulative_SOC_deviation: 8.1888 Fuel Consumptio

maximum steps, simulation is done ... 
elapsed_time: 82.955
Episode: 161 Exploration P: 0.0232 Total reward: -225.58315335262523 SOC: 0.5944 Cumulative_SOC_deviation: 14.7115 Fuel Consumption: 107.8915

maximum steps, simulation is done ... 
elapsed_time: 84.394
Episode: 162 Exploration P: 0.0228 Total reward: -260.80820409620594 SOC: 0.5878 Cumulative_SOC_deviation: 19.2276 Fuel Consumption: 106.9871

maximum steps, simulation is done ... 
elapsed_time: 83.621
Episode: 163 Exploration P: 0.0225 Total reward: -256.362675223693 SOC: 0.5961 Cumulative_SOC_deviation: 18.5284 Fuel Consumption: 108.1357

maximum steps, simulation is done ... 
elapsed_time: 83.276
Episode: 164 Exploration P: 0.0221 Total reward: -223.57436348475125 SOC: 0.5960 Cumulative_SOC_deviation: 14.3840 Fuel Consumption: 108.5020

maximum steps, simulation is done ... 
elapsed_time: 82.995
Episode: 165 Exploration P: 0.0218 Total reward: -234.563974504457 SOC: 0.5969 Cumulative_SOC_deviation: 15.7990 Fuel Consumption:

maximum steps, simulation is done ... 
elapsed_time: 12.841
Episode: 2 Exploration P: 1.0000 Total reward: -4548.6682670290575 SOC: 1.0000 Cumulative_SOC_deviation: 484.5749 Fuel Consumption: 187.4942

maximum steps, simulation is done ... 
elapsed_time: 12.880
Episode: 3 Exploration P: 1.0000 Total reward: -4570.861553350118 SOC: 1.0000 Cumulative_SOC_deviation: 486.2849 Fuel Consumption: 194.2974



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer construct

maximum steps, simulation is done ... 
elapsed_time: 82.080
Episode: 29 Exploration P: 0.5034 Total reward: -1181.946805427751 SOC: 0.8437 Cumulative_SOC_deviation: 115.2375 Fuel Consumption: 144.8091

maximum steps, simulation is done ... 
elapsed_time: 82.248
Episode: 30 Exploration P: 0.4901 Total reward: -856.4449102759505 SOC: 0.7663 Cumulative_SOC_deviation: 79.8323 Fuel Consumption: 137.9543

maximum steps, simulation is done ... 
elapsed_time: 82.131
Episode: 31 Exploration P: 0.4771 Total reward: -791.9392872735402 SOC: 0.7140 Cumulative_SOC_deviation: 73.0223 Fuel Consumption: 134.7382

maximum steps, simulation is done ... 
elapsed_time: 82.564
Episode: 32 Exploration P: 0.4644 Total reward: -1214.303944648258 SOC: 0.5603 Cumulative_SOC_deviation: 121.2838 Fuel Consumption: 122.7501

maximum steps, simulation is done ... 
elapsed_time: 82.228
Episode: 33 Exploration P: 0.4521 Total reward: -1192.5671451815438 SOC: 0.5967 Cumulative_SOC_deviation: 118.4769 Fuel Consumption: 1

maximum steps, simulation is done ... 
elapsed_time: 82.512
Episode: 70 Exploration P: 0.1701 Total reward: -249.2579401899497 SOC: 0.5965 Cumulative_SOC_deviation: 15.3192 Fuel Consumption: 111.3854

maximum steps, simulation is done ... 
elapsed_time: 82.466
Episode: 71 Exploration P: 0.1657 Total reward: -285.69174490180575 SOC: 0.5972 Cumulative_SOC_deviation: 19.2534 Fuel Consumption: 112.4110

maximum steps, simulation is done ... 
elapsed_time: 82.491
Episode: 72 Exploration P: 0.1615 Total reward: -204.63681056395893 SOC: 0.5956 Cumulative_SOC_deviation: 10.2325 Fuel Consumption: 112.5440

maximum steps, simulation is done ... 
elapsed_time: 82.367
Episode: 73 Exploration P: 0.1574 Total reward: -207.73243537000914 SOC: 0.5967 Cumulative_SOC_deviation: 10.6868 Fuel Consumption: 111.5508

maximum steps, simulation is done ... 
elapsed_time: 82.564
Episode: 74 Exploration P: 0.1534 Total reward: -196.96843368724012 SOC: 0.5957 Cumulative_SOC_deviation: 9.4575 Fuel Consumption: 11

maximum steps, simulation is done ... 
elapsed_time: 83.541
Episode: 111 Exploration P: 0.0619 Total reward: -191.37114481208326 SOC: 0.5945 Cumulative_SOC_deviation: 9.1885 Fuel Consumption: 108.6749

maximum steps, simulation is done ... 
elapsed_time: 83.727
Episode: 112 Exploration P: 0.0605 Total reward: -187.2773165491409 SOC: 0.5914 Cumulative_SOC_deviation: 8.7169 Fuel Consumption: 108.8253

maximum steps, simulation is done ... 
elapsed_time: 83.677
Episode: 113 Exploration P: 0.0591 Total reward: -191.94938558418883 SOC: 0.5931 Cumulative_SOC_deviation: 9.2661 Fuel Consumption: 108.5541

maximum steps, simulation is done ... 
elapsed_time: 83.614
Episode: 114 Exploration P: 0.0578 Total reward: -206.731208239282 SOC: 0.5912 Cumulative_SOC_deviation: 10.9541 Fuel Consumption: 108.1443

maximum steps, simulation is done ... 
elapsed_time: 83.568
Episode: 115 Exploration P: 0.0565 Total reward: -216.0980681118973 SOC: 0.5894 Cumulative_SOC_deviation: 11.9986 Fuel Consumption: 10

maximum steps, simulation is done ... 
elapsed_time: 75.041
Episode: 152 Exploration P: 0.0268 Total reward: -193.0538271184264 SOC: 0.5981 Cumulative_SOC_deviation: 9.4661 Fuel Consumption: 107.8585

maximum steps, simulation is done ... 
elapsed_time: 75.405
Episode: 153 Exploration P: 0.0264 Total reward: -175.8957525822267 SOC: 0.5943 Cumulative_SOC_deviation: 7.6068 Fuel Consumption: 107.4349

maximum steps, simulation is done ... 
elapsed_time: 75.214
Episode: 154 Exploration P: 0.0259 Total reward: -181.99307306120343 SOC: 0.5952 Cumulative_SOC_deviation: 8.2775 Fuel Consumption: 107.4954

maximum steps, simulation is done ... 
elapsed_time: 75.062
Episode: 155 Exploration P: 0.0255 Total reward: -178.10282618238386 SOC: 0.5957 Cumulative_SOC_deviation: 7.8823 Fuel Consumption: 107.1620

maximum steps, simulation is done ... 
elapsed_time: 75.031
Episode: 156 Exploration P: 0.0251 Total reward: -178.12311048611056 SOC: 0.5973 Cumulative_SOC_deviation: 7.8638 Fuel Consumption: 10

maximum steps, simulation is done ... 
elapsed_time: 70.142
Episode: 193 Exploration P: 0.0155 Total reward: -187.60076076882964 SOC: 0.5960 Cumulative_SOC_deviation: 8.9311 Fuel Consumption: 107.2213

maximum steps, simulation is done ... 
elapsed_time: 70.157
Episode: 194 Exploration P: 0.0153 Total reward: -199.46692005935049 SOC: 0.5935 Cumulative_SOC_deviation: 10.2735 Fuel Consumption: 107.0050

maximum steps, simulation is done ... 
elapsed_time: 70.197
Episode: 195 Exploration P: 0.0152 Total reward: -213.9128303970997 SOC: 0.5913 Cumulative_SOC_deviation: 11.9165 Fuel Consumption: 106.6643

maximum steps, simulation is done ... 
elapsed_time: 69.995
Episode: 196 Exploration P: 0.0150 Total reward: -211.04989557712466 SOC: 0.5949 Cumulative_SOC_deviation: 11.4571 Fuel Consumption: 107.9362

maximum steps, simulation is done ... 
elapsed_time: 70.030
Episode: 197 Exploration P: 0.0149 Total reward: -202.29224319186955 SOC: 0.5952 Cumulative_SOC_deviation: 10.5145 Fuel Consumptio

maximum steps, simulation is done ... 
elapsed_time: 69.236
Episode: 20 Exploration P: 0.6418 Total reward: -3229.758010082754 SOC: 0.9997 Cumulative_SOC_deviation: 307.6826 Fuel Consumption: 152.9317

maximum steps, simulation is done ... 
elapsed_time: 69.167
Episode: 21 Exploration P: 0.6247 Total reward: -3419.5866021161814 SOC: 0.9982 Cumulative_SOC_deviation: 326.4964 Fuel Consumption: 154.6222

maximum steps, simulation is done ... 
elapsed_time: 69.209
Episode: 22 Exploration P: 0.6080 Total reward: -2628.270594205328 SOC: 0.9994 Cumulative_SOC_deviation: 247.3999 Fuel Consumption: 154.2717

maximum steps, simulation is done ... 
elapsed_time: 69.276
Episode: 23 Exploration P: 0.5918 Total reward: -2431.2763613764027 SOC: 0.9418 Cumulative_SOC_deviation: 228.1200 Fuel Consumption: 150.0764

maximum steps, simulation is done ... 
elapsed_time: 69.337
Episode: 24 Exploration P: 0.5761 Total reward: -2726.394010576031 SOC: 0.9633 Cumulative_SOC_deviation: 257.4112 Fuel Consumption

maximum steps, simulation is done ... 
elapsed_time: 69.758
Episode: 61 Exploration P: 0.2149 Total reward: -210.50542727789102 SOC: 0.5906 Cumulative_SOC_deviation: 9.4875 Fuel Consumption: 115.6303

maximum steps, simulation is done ... 
elapsed_time: 70.018
Episode: 62 Exploration P: 0.2094 Total reward: -213.03667796449307 SOC: 0.5865 Cumulative_SOC_deviation: 9.9041 Fuel Consumption: 113.9954

maximum steps, simulation is done ... 
elapsed_time: 70.764
Episode: 63 Exploration P: 0.2040 Total reward: -213.75681884621648 SOC: 0.5995 Cumulative_SOC_deviation: 9.8135 Fuel Consumption: 115.6217

maximum steps, simulation is done ... 
elapsed_time: 70.517
Episode: 64 Exploration P: 0.1987 Total reward: -229.31185745228532 SOC: 0.5913 Cumulative_SOC_deviation: 11.6269 Fuel Consumption: 113.0430

maximum steps, simulation is done ... 
elapsed_time: 69.776
Episode: 65 Exploration P: 0.1936 Total reward: -234.97044774485383 SOC: 0.5845 Cumulative_SOC_deviation: 12.2057 Fuel Consumption: 112

maximum steps, simulation is done ... 
elapsed_time: 73.565
Episode: 102 Exploration P: 0.0765 Total reward: -200.90216667401918 SOC: 0.5932 Cumulative_SOC_deviation: 9.2340 Fuel Consumption: 108.5627

maximum steps, simulation is done ... 
elapsed_time: 74.258
Episode: 103 Exploration P: 0.0747 Total reward: -180.68552630878702 SOC: 0.5930 Cumulative_SOC_deviation: 7.1603 Fuel Consumption: 109.0828

maximum steps, simulation is done ... 
elapsed_time: 73.811
Episode: 104 Exploration P: 0.0729 Total reward: -173.07649863251333 SOC: 0.5952 Cumulative_SOC_deviation: 6.3705 Fuel Consumption: 109.3718

maximum steps, simulation is done ... 
elapsed_time: 70.024
Episode: 105 Exploration P: 0.0712 Total reward: -189.12999548524158 SOC: 0.5934 Cumulative_SOC_deviation: 8.0468 Fuel Consumption: 108.6615

maximum steps, simulation is done ... 
elapsed_time: 70.014
Episode: 106 Exploration P: 0.0696 Total reward: -206.81619379415847 SOC: 0.5917 Cumulative_SOC_deviation: 9.8654 Fuel Consumption: 

maximum steps, simulation is done ... 
elapsed_time: 70.424
Episode: 143 Exploration P: 0.0316 Total reward: -235.1702472266271 SOC: 0.5927 Cumulative_SOC_deviation: 12.7005 Fuel Consumption: 108.1650

maximum steps, simulation is done ... 
elapsed_time: 70.787
Episode: 144 Exploration P: 0.0310 Total reward: -219.86547466316074 SOC: 0.5921 Cumulative_SOC_deviation: 11.2133 Fuel Consumption: 107.7321

maximum steps, simulation is done ... 
elapsed_time: 70.311
Episode: 145 Exploration P: 0.0304 Total reward: -235.0374192143273 SOC: 0.5908 Cumulative_SOC_deviation: 12.7290 Fuel Consumption: 107.7469

maximum steps, simulation is done ... 
elapsed_time: 70.662
Episode: 146 Exploration P: 0.0299 Total reward: -256.0377233769655 SOC: 0.5913 Cumulative_SOC_deviation: 14.8161 Fuel Consumption: 107.8765

maximum steps, simulation is done ... 
elapsed_time: 70.602
Episode: 147 Exploration P: 0.0293 Total reward: -249.19025651023043 SOC: 0.5947 Cumulative_SOC_deviation: 14.0441 Fuel Consumption

maximum steps, simulation is done ... 
elapsed_time: 71.008
Episode: 184 Exploration P: 0.0170 Total reward: -246.9429955034999 SOC: 0.5982 Cumulative_SOC_deviation: 13.7009 Fuel Consumption: 109.9342

maximum steps, simulation is done ... 
elapsed_time: 71.159
Episode: 185 Exploration P: 0.0168 Total reward: -239.0574434933148 SOC: 0.5957 Cumulative_SOC_deviation: 12.8433 Fuel Consumption: 110.6241

maximum steps, simulation is done ... 
elapsed_time: 70.876
Episode: 186 Exploration P: 0.0166 Total reward: -208.13111019771017 SOC: 0.5918 Cumulative_SOC_deviation: 9.8947 Fuel Consumption: 109.1846

maximum steps, simulation is done ... 
elapsed_time: 70.633
Episode: 187 Exploration P: 0.0164 Total reward: -211.56831725925397 SOC: 0.5974 Cumulative_SOC_deviation: 10.1143 Fuel Consumption: 110.4256

maximum steps, simulation is done ... 
elapsed_time: 70.902
Episode: 188 Exploration P: 0.0163 Total reward: -188.84974235996356 SOC: 0.5990 Cumulative_SOC_deviation: 7.8529 Fuel Consumption:

In [16]:
with open("DDPG_cycleOne_7to10.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [17]:
# results_dict