In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 
import scipy.io as sio

from vehicle_model_variant import Environment 
from cell_model import CellModel 
from driver_MDP import Driver_MDP 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
# env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)
driver = Driver_MDP(0.02)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 200 
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1.0 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 5000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 
    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
#     actor_model.load_weights("./DDPG1_trial1/actor_model_checkpoint")
#     critic_model.load_weights("./DDPG1_trial1/critic_model_checkpoint")
#     target_actor.load_weights("./DDPG1_trial1/target_actor_checkpoint")
#     target_critic.load_weights("./DDPG1_trial1/target_critic_checkpoint")
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    if not os.path.exists(root): 
        os.makedirs(root)
        
    actor_model.save_weights("./{}/actor_model.h5".format(root))
    critic_model.save_weights("./{}/critic_model.h5".format(root))
    target_actor.save_weights("./{}/target_actor.h5".format(root))
    target_critic.save_weights("./{}/target_critic.h5".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [14]:
def test_agent(actor_model, reward_factor):
#     test_cycle = driver.get_cycle() 
    test_cycle_path = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
    test_cycle = sio.loadmat(test_cycle_path)
    test_cycle = test_cycle["sch_cycle"][:, 1]
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0
    state = env.reset() 
    while True: 
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        action = policy_epsilon_greedy(tf_state, -1)
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
        
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    
    print("******************* Test is start *****************")
#     print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    plt.subplot(1, 2, 1)
    plt.plot(test_cycle)
    plt.subplot(1, 2, 2)
    plt.plot(env.history["Action"])
    plt.show() 
    return env.history  
    

In [15]:
# print(env.version)

# num_trials = 1
reward_factors = [7, 8, 9, 10]
results_dict = {} 
driving_cycle_path = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
driving_cycle = sio.loadmat(driving_cycle_path)
driving_cycle = driving_cycle["sch_cycle"][:, 1]

for reward_factor in reward_factors: 
    print("")
    print("Trial {}".format(reward_factor))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    episode_test_history = [] 
    episode_num_test = [] 
    for ep in range(total_episodes): 
#         driving_cycle = driver.get_cycle() 
        env = initialization_env(driving_cycle, reward_factor)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
        print("")
        
        if (ep + 1) % 10 == 0: 
#             history = test_agent(actor_model, reward_factor)
            history = env.history 
            episode_test_history.append(history) 
            episode_num_test.append(ep + 1)
            
#         if (ep + 1) % 200 == 0:             
    root = "DDPG_cycleOne_reward_factor{}".format(reward_factor)
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
            
    
    results_dict[reward_factor] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs, 
        "test_history": episode_test_history, 
        "test_episode_num": episode_num_test, 
    }


Trial 7

maximum steps, simulation is done ... 
elapsed_time: 14.777
Episode: 1 Exploration P: 1.0000 Total reward: -3592.440995837971 SOC: 1.0000 Cumulative_SOC_deviation: 492.0373 Fuel Consumption: 148.1799

maximum steps, simulation is done ... 
elapsed_time: 14.059
Episode: 2 Exploration P: 1.0000 Total reward: -3629.894829428993 SOC: 1.0000 Cumulative_SOC_deviation: 496.6670 Fuel Consumption: 153.2255

maximum steps, simulation is done ... 
elapsed_time: 14.447
Episode: 3 Exploration P: 1.0000 Total reward: -3625.5167588200866 SOC: 1.0000 Cumulative_SOC_deviation: 496.2627 Fuel Consumption: 151.6778



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_f

maximum steps, simulation is done ... 
elapsed_time: 91.191
Episode: 28 Exploration P: 0.5172 Total reward: -2456.5039045747326 SOC: 1.0000 Cumulative_SOC_deviation: 337.7159 Fuel Consumption: 92.4930

maximum steps, simulation is done ... 
elapsed_time: 91.116
Episode: 29 Exploration P: 0.5034 Total reward: -2860.8565506837176 SOC: 1.0000 Cumulative_SOC_deviation: 395.0354 Fuel Consumption: 95.6090

maximum steps, simulation is done ... 
elapsed_time: 90.790
Episode: 30 Exploration P: 0.4901 Total reward: -2589.8848028415814 SOC: 1.0000 Cumulative_SOC_deviation: 356.8248 Fuel Consumption: 92.1112

maximum steps, simulation is done ... 
elapsed_time: 91.230
Episode: 31 Exploration P: 0.4771 Total reward: -2500.775452820299 SOC: 1.0000 Cumulative_SOC_deviation: 344.0300 Fuel Consumption: 92.5652

maximum steps, simulation is done ... 
elapsed_time: 91.223
Episode: 32 Exploration P: 0.4644 Total reward: -1989.04091184149 SOC: 1.0000 Cumulative_SOC_deviation: 271.5899 Fuel Consumption: 87

maximum steps, simulation is done ... 
elapsed_time: 91.568
Episode: 69 Exploration P: 0.1745 Total reward: -1806.4599652700497 SOC: 0.3862 Cumulative_SOC_deviation: 252.5984 Fuel Consumption: 38.2711

maximum steps, simulation is done ... 
elapsed_time: 90.326
Episode: 70 Exploration P: 0.1701 Total reward: -2195.1857228186486 SOC: 0.3219 Cumulative_SOC_deviation: 308.7497 Fuel Consumption: 33.9375

maximum steps, simulation is done ... 
elapsed_time: 92.099
Episode: 71 Exploration P: 0.1657 Total reward: -2256.7193115214627 SOC: 0.2840 Cumulative_SOC_deviation: 317.8734 Fuel Consumption: 31.6056

maximum steps, simulation is done ... 
elapsed_time: 91.839
Episode: 72 Exploration P: 0.1615 Total reward: -2217.097971808395 SOC: 0.3082 Cumulative_SOC_deviation: 311.9464 Fuel Consumption: 33.4732

maximum steps, simulation is done ... 
elapsed_time: 92.120
Episode: 73 Exploration P: 0.1574 Total reward: -2365.301408773898 SOC: 0.3042 Cumulative_SOC_deviation: 333.1684 Fuel Consumption: 3

maximum steps, simulation is done ... 
elapsed_time: 92.225
Episode: 110 Exploration P: 0.0634 Total reward: -274.58943906783327 SOC: 0.5837 Cumulative_SOC_deviation: 31.6770 Fuel Consumption: 52.8506

maximum steps, simulation is done ... 
elapsed_time: 91.939
Episode: 111 Exploration P: 0.0619 Total reward: -203.88707653968643 SOC: 0.5974 Cumulative_SOC_deviation: 21.4319 Fuel Consumption: 53.8639

maximum steps, simulation is done ... 
elapsed_time: 92.563
Episode: 112 Exploration P: 0.0605 Total reward: -171.8834281849773 SOC: 0.5817 Cumulative_SOC_deviation: 16.9918 Fuel Consumption: 52.9410

maximum steps, simulation is done ... 
elapsed_time: 92.140
Episode: 113 Exploration P: 0.0591 Total reward: -228.1567382955488 SOC: 0.6024 Cumulative_SOC_deviation: 24.7755 Fuel Consumption: 54.7280

maximum steps, simulation is done ... 
elapsed_time: 92.343
Episode: 114 Exploration P: 0.0578 Total reward: -177.98307696973853 SOC: 0.5949 Cumulative_SOC_deviation: 17.7526 Fuel Consumption: 5

maximum steps, simulation is done ... 
elapsed_time: 92.176
Episode: 151 Exploration P: 0.0273 Total reward: -216.69948937465242 SOC: 0.5859 Cumulative_SOC_deviation: 24.3072 Fuel Consumption: 46.5491

maximum steps, simulation is done ... 
elapsed_time: 91.030
Episode: 152 Exploration P: 0.0268 Total reward: -202.53330549779997 SOC: 0.5942 Cumulative_SOC_deviation: 22.2562 Fuel Consumption: 46.7401

maximum steps, simulation is done ... 
elapsed_time: 91.821
Episode: 153 Exploration P: 0.0264 Total reward: -186.41321821013145 SOC: 0.5986 Cumulative_SOC_deviation: 19.8612 Fuel Consumption: 47.3847

maximum steps, simulation is done ... 
elapsed_time: 92.329
Episode: 154 Exploration P: 0.0259 Total reward: -161.22105969836915 SOC: 0.5997 Cumulative_SOC_deviation: 16.0897 Fuel Consumption: 48.5930

maximum steps, simulation is done ... 
elapsed_time: 92.604
Episode: 155 Exploration P: 0.0255 Total reward: -173.97327045953367 SOC: 0.5981 Cumulative_SOC_deviation: 17.9405 Fuel Consumption:

maximum steps, simulation is done ... 
elapsed_time: 92.567
Episode: 192 Exploration P: 0.0156 Total reward: -160.19328358902823 SOC: 0.5970 Cumulative_SOC_deviation: 16.3721 Fuel Consumption: 45.5883

maximum steps, simulation is done ... 
elapsed_time: 92.315
Episode: 193 Exploration P: 0.0155 Total reward: -185.62312412264302 SOC: 0.5952 Cumulative_SOC_deviation: 19.9704 Fuel Consumption: 45.8306

maximum steps, simulation is done ... 
elapsed_time: 92.676
Episode: 194 Exploration P: 0.0153 Total reward: -182.7403885461153 SOC: 0.5948 Cumulative_SOC_deviation: 19.5846 Fuel Consumption: 45.6484

maximum steps, simulation is done ... 
elapsed_time: 92.598
Episode: 195 Exploration P: 0.0152 Total reward: -211.87198466445554 SOC: 0.5923 Cumulative_SOC_deviation: 23.8539 Fuel Consumption: 44.8949

maximum steps, simulation is done ... 
elapsed_time: 86.902
Episode: 196 Exploration P: 0.0150 Total reward: -184.66199278150236 SOC: 0.5928 Cumulative_SOC_deviation: 19.9631 Fuel Consumption: 

maximum steps, simulation is done ... 
elapsed_time: 90.294
Episode: 19 Exploration P: 0.6594 Total reward: -3459.7019279037468 SOC: 1.0000 Cumulative_SOC_deviation: 418.4299 Fuel Consumption: 112.2625

maximum steps, simulation is done ... 
elapsed_time: 90.197
Episode: 20 Exploration P: 0.6418 Total reward: -3630.0855763710406 SOC: 1.0000 Cumulative_SOC_deviation: 439.6170 Fuel Consumption: 113.1499

maximum steps, simulation is done ... 
elapsed_time: 90.444
Episode: 21 Exploration P: 0.6247 Total reward: -3332.1868624925905 SOC: 1.0000 Cumulative_SOC_deviation: 403.4347 Fuel Consumption: 104.7096

maximum steps, simulation is done ... 
elapsed_time: 89.002
Episode: 22 Exploration P: 0.6080 Total reward: -3485.490651677109 SOC: 1.0000 Cumulative_SOC_deviation: 422.6441 Fuel Consumption: 104.3382

maximum steps, simulation is done ... 
elapsed_time: 90.854
Episode: 23 Exploration P: 0.5918 Total reward: -3397.7111187569844 SOC: 1.0000 Cumulative_SOC_deviation: 412.2262 Fuel Consumpti

maximum steps, simulation is done ... 
elapsed_time: 82.111
Episode: 60 Exploration P: 0.2206 Total reward: -1659.2950077439646 SOC: 0.4779 Cumulative_SOC_deviation: 201.7406 Fuel Consumption: 45.3700

maximum steps, simulation is done ... 
elapsed_time: 82.601
Episode: 61 Exploration P: 0.2149 Total reward: -1726.8118122629544 SOC: 0.4970 Cumulative_SOC_deviation: 209.9403 Fuel Consumption: 47.2892

maximum steps, simulation is done ... 
elapsed_time: 82.468
Episode: 62 Exploration P: 0.2094 Total reward: -1473.665939222874 SOC: 0.4882 Cumulative_SOC_deviation: 178.4054 Fuel Consumption: 46.4225

maximum steps, simulation is done ... 
elapsed_time: 82.605
Episode: 63 Exploration P: 0.2040 Total reward: -1637.6656524821135 SOC: 0.4745 Cumulative_SOC_deviation: 199.0124 Fuel Consumption: 45.5661

maximum steps, simulation is done ... 
elapsed_time: 82.124
Episode: 64 Exploration P: 0.1987 Total reward: -2423.998299576411 SOC: 0.4001 Cumulative_SOC_deviation: 297.9592 Fuel Consumption: 4

maximum steps, simulation is done ... 
elapsed_time: 83.079
Episode: 101 Exploration P: 0.0783 Total reward: -125.24113495881248 SOC: 0.6084 Cumulative_SOC_deviation: 9.4440 Fuel Consumption: 49.6895

maximum steps, simulation is done ... 
elapsed_time: 83.311
Episode: 102 Exploration P: 0.0765 Total reward: -130.75519857985432 SOC: 0.5915 Cumulative_SOC_deviation: 10.3198 Fuel Consumption: 48.1965

maximum steps, simulation is done ... 
elapsed_time: 83.832
Episode: 103 Exploration P: 0.0747 Total reward: -141.55775269738385 SOC: 0.5978 Cumulative_SOC_deviation: 11.6756 Fuel Consumption: 48.1527

maximum steps, simulation is done ... 
elapsed_time: 83.861
Episode: 104 Exploration P: 0.0729 Total reward: -147.74304781563447 SOC: 0.6013 Cumulative_SOC_deviation: 12.4324 Fuel Consumption: 48.2842

maximum steps, simulation is done ... 
elapsed_time: 83.555
Episode: 105 Exploration P: 0.0712 Total reward: -139.74325132833738 SOC: 0.5900 Cumulative_SOC_deviation: 11.4988 Fuel Consumption: 

maximum steps, simulation is done ... 
elapsed_time: 83.114
Episode: 142 Exploration P: 0.0322 Total reward: -115.1370116827299 SOC: 0.5997 Cumulative_SOC_deviation: 8.1307 Fuel Consumption: 50.0912

maximum steps, simulation is done ... 
elapsed_time: 82.943
Episode: 143 Exploration P: 0.0316 Total reward: -119.15519773163513 SOC: 0.5989 Cumulative_SOC_deviation: 8.6726 Fuel Consumption: 49.7743

maximum steps, simulation is done ... 
elapsed_time: 83.119
Episode: 144 Exploration P: 0.0310 Total reward: -129.11493309339002 SOC: 0.6026 Cumulative_SOC_deviation: 10.0339 Fuel Consumption: 48.8434

maximum steps, simulation is done ... 
elapsed_time: 83.121
Episode: 145 Exploration P: 0.0304 Total reward: -139.84357081689103 SOC: 0.6019 Cumulative_SOC_deviation: 11.3962 Fuel Consumption: 48.6738

maximum steps, simulation is done ... 
elapsed_time: 83.094
Episode: 146 Exploration P: 0.0299 Total reward: -134.21251695820712 SOC: 0.5971 Cumulative_SOC_deviation: 10.6603 Fuel Consumption: 48

maximum steps, simulation is done ... 
elapsed_time: 83.740
Episode: 183 Exploration P: 0.0172 Total reward: -114.20655345178236 SOC: 0.5979 Cumulative_SOC_deviation: 8.3951 Fuel Consumption: 47.0459

maximum steps, simulation is done ... 
elapsed_time: 84.188
Episode: 184 Exploration P: 0.0170 Total reward: -159.34169751603653 SOC: 0.5967 Cumulative_SOC_deviation: 13.9155 Fuel Consumption: 48.0176

maximum steps, simulation is done ... 
elapsed_time: 84.327
Episode: 185 Exploration P: 0.0168 Total reward: -140.52731380960614 SOC: 0.6001 Cumulative_SOC_deviation: 11.5301 Fuel Consumption: 48.2861

maximum steps, simulation is done ... 
elapsed_time: 83.479
Episode: 186 Exploration P: 0.0166 Total reward: -146.83649089721428 SOC: 0.5969 Cumulative_SOC_deviation: 12.3309 Fuel Consumption: 48.1894

maximum steps, simulation is done ... 
elapsed_time: 83.766
Episode: 187 Exploration P: 0.0164 Total reward: -140.03884384252714 SOC: 0.5991 Cumulative_SOC_deviation: 11.5115 Fuel Consumption: 

maximum steps, simulation is done ... 
elapsed_time: 83.835
Episode: 10 Exploration P: 0.8414 Total reward: -4414.07649073535 SOC: 1.0000 Cumulative_SOC_deviation: 475.7472 Fuel Consumption: 132.3519

maximum steps, simulation is done ... 
elapsed_time: 83.729
Episode: 11 Exploration P: 0.8189 Total reward: -4364.948838708755 SOC: 1.0000 Cumulative_SOC_deviation: 471.1862 Fuel Consumption: 124.2728

maximum steps, simulation is done ... 
elapsed_time: 83.830
Episode: 12 Exploration P: 0.7970 Total reward: -4382.882407658159 SOC: 1.0000 Cumulative_SOC_deviation: 472.7111 Fuel Consumption: 128.4826

maximum steps, simulation is done ... 
elapsed_time: 84.094
Episode: 13 Exploration P: 0.7757 Total reward: -4319.485125011663 SOC: 1.0000 Cumulative_SOC_deviation: 465.8160 Fuel Consumption: 127.1413

maximum steps, simulation is done ... 
elapsed_time: 83.716
Episode: 14 Exploration P: 0.7549 Total reward: -4194.167900464329 SOC: 1.0000 Cumulative_SOC_deviation: 452.6038 Fuel Consumption: 1

maximum steps, simulation is done ... 
elapsed_time: 85.013
Episode: 51 Exploration P: 0.2797 Total reward: -964.5590210022355 SOC: 0.6421 Cumulative_SOC_deviation: 100.7071 Fuel Consumption: 58.1955

maximum steps, simulation is done ... 
elapsed_time: 85.304
Episode: 52 Exploration P: 0.2724 Total reward: -701.4837894273246 SOC: 0.7448 Cumulative_SOC_deviation: 70.7599 Fuel Consumption: 64.6443

maximum steps, simulation is done ... 
elapsed_time: 84.564
Episode: 53 Exploration P: 0.2653 Total reward: -827.7810439604345 SOC: 0.7229 Cumulative_SOC_deviation: 84.8423 Fuel Consumption: 64.2006

maximum steps, simulation is done ... 
elapsed_time: 84.802
Episode: 54 Exploration P: 0.2584 Total reward: -578.9197367658476 SOC: 0.6940 Cumulative_SOC_deviation: 57.5659 Fuel Consumption: 60.8266

maximum steps, simulation is done ... 
elapsed_time: 84.842
Episode: 55 Exploration P: 0.2516 Total reward: -780.7277692218638 SOC: 0.7169 Cumulative_SOC_deviation: 79.7081 Fuel Consumption: 63.3545


maximum steps, simulation is done ... 
elapsed_time: 82.303
Episode: 92 Exploration P: 0.0975 Total reward: -145.92133282632048 SOC: 0.6012 Cumulative_SOC_deviation: 10.3944 Fuel Consumption: 52.3715

maximum steps, simulation is done ... 
elapsed_time: 85.301
Episode: 93 Exploration P: 0.0951 Total reward: -180.08342370431794 SOC: 0.5962 Cumulative_SOC_deviation: 14.3329 Fuel Consumption: 51.0875

maximum steps, simulation is done ... 
elapsed_time: 85.416
Episode: 94 Exploration P: 0.0928 Total reward: -146.2547449380227 SOC: 0.5998 Cumulative_SOC_deviation: 10.3702 Fuel Consumption: 52.9225

maximum steps, simulation is done ... 
elapsed_time: 85.384
Episode: 95 Exploration P: 0.0906 Total reward: -158.72245878924002 SOC: 0.6058 Cumulative_SOC_deviation: 11.7605 Fuel Consumption: 52.8777

maximum steps, simulation is done ... 
elapsed_time: 85.568
Episode: 96 Exploration P: 0.0884 Total reward: -123.71207609026067 SOC: 0.6056 Cumulative_SOC_deviation: 7.9317 Fuel Consumption: 52.327

maximum steps, simulation is done ... 
elapsed_time: 85.740
Episode: 133 Exploration P: 0.0384 Total reward: -131.5152022623227 SOC: 0.6012 Cumulative_SOC_deviation: 9.4699 Fuel Consumption: 46.2864

maximum steps, simulation is done ... 
elapsed_time: 86.093
Episode: 134 Exploration P: 0.0376 Total reward: -138.03914308795063 SOC: 0.5936 Cumulative_SOC_deviation: 10.3019 Fuel Consumption: 45.3216

maximum steps, simulation is done ... 
elapsed_time: 86.405
Episode: 135 Exploration P: 0.0369 Total reward: -141.58819878762313 SOC: 0.5946 Cumulative_SOC_deviation: 10.6874 Fuel Consumption: 45.4017

maximum steps, simulation is done ... 
elapsed_time: 85.874
Episode: 136 Exploration P: 0.0361 Total reward: -145.711698911193 SOC: 0.5952 Cumulative_SOC_deviation: 11.1339 Fuel Consumption: 45.5065

maximum steps, simulation is done ... 
elapsed_time: 85.593
Episode: 137 Exploration P: 0.0354 Total reward: -141.3383673195926 SOC: 0.5960 Cumulative_SOC_deviation: 10.6429 Fuel Consumption: 45.5

maximum steps, simulation is done ... 
elapsed_time: 85.782
Episode: 174 Exploration P: 0.0192 Total reward: -174.29978076967922 SOC: 0.5902 Cumulative_SOC_deviation: 14.4236 Fuel Consumption: 44.4876

maximum steps, simulation is done ... 
elapsed_time: 85.783
Episode: 175 Exploration P: 0.0190 Total reward: -193.66683458985275 SOC: 0.5888 Cumulative_SOC_deviation: 16.5873 Fuel Consumption: 44.3813

maximum steps, simulation is done ... 
elapsed_time: 86.257
Episode: 176 Exploration P: 0.0187 Total reward: -185.88052741226153 SOC: 0.5918 Cumulative_SOC_deviation: 15.6822 Fuel Consumption: 44.7408

maximum steps, simulation is done ... 
elapsed_time: 86.118
Episode: 177 Exploration P: 0.0185 Total reward: -165.00237538980448 SOC: 0.5923 Cumulative_SOC_deviation: 13.3134 Fuel Consumption: 45.1815

maximum steps, simulation is done ... 
elapsed_time: 86.482
Episode: 178 Exploration P: 0.0182 Total reward: -183.78883536035846 SOC: 0.5978 Cumulative_SOC_deviation: 15.3960 Fuel Consumption:



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

maximum steps, simulation is done ... 
elapsed_time: 31.078
Episode: 4 Exploration P: 0.9903 Total reward: -5155.085447417934 SOC: 1.0000 Cumulative_SOC_deviation: 500.5100 Fuel Consumption: 149.9856

maximum steps, simulation is done ... 
elapsed_time: 67.468
Episode: 5 Exploration P: 0.9638 Total reward: -5091.574052831853 SOC: 1.0000 Cumulative_SOC_deviation: 494.2404 Fuel Consumption: 149.1705

maximum steps, simulation is done ... 
elapsed_time: 67.718
Episode: 6 Exploration P: 0.9379 Total reward: -4981.953249032387 SOC: 1.0000 Cumulative_SOC_deviation: 484.0614 Fuel Consumption: 141.3390

maximum steps, simulation is done ... 
elapsed_time: 67.758
Episode: 7 Exploration P: 0.9128 Tot

maximum steps, simulation is done ... 
elapsed_time: 68.729
Episode: 42 Exploration P: 0.3553 Total reward: -1753.9295493377467 SOC: 0.9373 Cumulative_SOC_deviation: 167.4881 Fuel Consumption: 79.0484

maximum steps, simulation is done ... 
elapsed_time: 68.650
Episode: 43 Exploration P: 0.3459 Total reward: -1730.0315282227277 SOC: 0.9162 Cumulative_SOC_deviation: 165.1932 Fuel Consumption: 78.0991

maximum steps, simulation is done ... 
elapsed_time: 68.665
Episode: 44 Exploration P: 0.3368 Total reward: -1086.073132635346 SOC: 0.8399 Cumulative_SOC_deviation: 101.3577 Fuel Consumption: 72.4964

maximum steps, simulation is done ... 
elapsed_time: 68.574
Episode: 45 Exploration P: 0.3280 Total reward: -1480.2537238720454 SOC: 0.9029 Cumulative_SOC_deviation: 140.2836 Fuel Consumption: 77.4182

maximum steps, simulation is done ... 
elapsed_time: 68.691
Episode: 46 Exploration P: 0.3194 Total reward: -1144.3384190226452 SOC: 0.8239 Cumulative_SOC_deviation: 107.3637 Fuel Consumption: 

maximum steps, simulation is done ... 
elapsed_time: 69.323
Episode: 83 Exploration P: 0.1220 Total reward: -197.6426159735605 SOC: 0.6097 Cumulative_SOC_deviation: 14.7692 Fuel Consumption: 49.9502

maximum steps, simulation is done ... 
elapsed_time: 69.325
Episode: 84 Exploration P: 0.1190 Total reward: -160.21976347042903 SOC: 0.6176 Cumulative_SOC_deviation: 10.8624 Fuel Consumption: 51.5962

maximum steps, simulation is done ... 
elapsed_time: 69.014
Episode: 85 Exploration P: 0.1160 Total reward: -138.32757972962435 SOC: 0.6073 Cumulative_SOC_deviation: 8.8833 Fuel Consumption: 49.4942

maximum steps, simulation is done ... 
elapsed_time: 68.625
Episode: 86 Exploration P: 0.1131 Total reward: -118.36761723864228 SOC: 0.6041 Cumulative_SOC_deviation: 6.9734 Fuel Consumption: 48.6333

maximum steps, simulation is done ... 
elapsed_time: 68.940
Episode: 87 Exploration P: 0.1104 Total reward: -154.59631856379022 SOC: 0.6165 Cumulative_SOC_deviation: 10.4217 Fuel Consumption: 50.3789

maximum steps, simulation is done ... 
elapsed_time: 69.446
Episode: 124 Exploration P: 0.0463 Total reward: -159.338030504145 SOC: 0.5933 Cumulative_SOC_deviation: 11.3157 Fuel Consumption: 46.1813

maximum steps, simulation is done ... 
elapsed_time: 69.269
Episode: 125 Exploration P: 0.0453 Total reward: -153.78531258581663 SOC: 0.5949 Cumulative_SOC_deviation: 10.7388 Fuel Consumption: 46.3977

maximum steps, simulation is done ... 
elapsed_time: 69.562
Episode: 126 Exploration P: 0.0444 Total reward: -110.83554995880418 SOC: 0.6011 Cumulative_SOC_deviation: 6.4348 Fuel Consumption: 46.4877

maximum steps, simulation is done ... 
elapsed_time: 69.741
Episode: 127 Exploration P: 0.0435 Total reward: -107.21579211373441 SOC: 0.6106 Cumulative_SOC_deviation: 6.0209 Fuel Consumption: 47.0071

maximum steps, simulation is done ... 
elapsed_time: 69.344
Episode: 128 Exploration P: 0.0426 Total reward: -110.18677412445238 SOC: 0.6143 Cumulative_SOC_deviation: 6.3091 Fuel Consumption: 47.0

maximum steps, simulation is done ... 
elapsed_time: 69.408
Episode: 165 Exploration P: 0.0218 Total reward: -111.60282362869586 SOC: 0.5941 Cumulative_SOC_deviation: 6.6532 Fuel Consumption: 45.0707

maximum steps, simulation is done ... 
elapsed_time: 69.395
Episode: 166 Exploration P: 0.0215 Total reward: -129.64379476804888 SOC: 0.5998 Cumulative_SOC_deviation: 8.4518 Fuel Consumption: 45.1258

maximum steps, simulation is done ... 
elapsed_time: 69.301
Episode: 167 Exploration P: 0.0212 Total reward: -109.08227933996433 SOC: 0.6040 Cumulative_SOC_deviation: 6.3281 Fuel Consumption: 45.8014

maximum steps, simulation is done ... 
elapsed_time: 69.672
Episode: 168 Exploration P: 0.0209 Total reward: -109.9996392778081 SOC: 0.6034 Cumulative_SOC_deviation: 6.4071 Fuel Consumption: 45.9291

maximum steps, simulation is done ... 
elapsed_time: 69.505
Episode: 169 Exploration P: 0.0206 Total reward: -115.61151282909829 SOC: 0.6036 Cumulative_SOC_deviation: 7.0052 Fuel Consumption: 45.55

In [16]:
with open("DDPG_cycleOne_7to10.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [17]:
# results_dict