In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 

from vehicle_model_DDPG1 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    
    out = layers.Dense(512, activation="relu")(inputs)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    
    state_out = layers.Dense(16, activation="relu")(state_input)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 200
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 3000 

In [11]:
def initialization(reward_factor): 
    actor_model = get_actor() 
    critic_model = get_critic() 

    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
    
    buffer = Buffer(500000, BATCH_SIZE)
    env = Environment(cell_model, drving_cycle, battery_path, motor_path, reward_factor)
    return actor_model, critic_model, target_actor, target_critic, buffer, env 

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    actor_model.save_weights("./{}/actor_model_checkpoint".format(root))
    critic_model.save_weights("./{}/critic_model_checkpoint".format(root))
    target_actor.save_weights("./{}/target_actor_checkpoint".format(root))
    target_critic.save_weights("./{}/target_critic_checkpoint".format(root))
    print("model is saved..")

In [13]:
print(env.version)

num_trials = 3
reward_factor = 10
results_dict = {} 
for trial in range(num_trials): 
    print()
    print("Trial {}".format(trial))
    
    actor_model, critic_model, target_actor, target_critic, buffer, env = initialization(
        reward_factor
    )
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    for ep in range(total_episodes): 
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * steps)

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
    
#     root = "DDPG1_trial{}".format(trial+1)
#     save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs 
    }

1

Trial 0
maximum steps, simulation is done ... 
elapsed_time: 31.199
Episode: 1 Exploration P: 1.0000 Total reward: -5120.667015600076 SOC: 1.0000 Cumulative_SOC_deviation: 496.5048 Fuel Consumption: 155.6193
maximum steps, simulation is done ... 
elapsed_time: 30.611
Episode: 2 Exploration P: 1.0000 Total reward: -5114.857023349722 SOC: 1.0000 Cumulative_SOC_deviation: 496.1869 Fuel Consumption: 152.9882


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer c

maximum steps, simulation is done ... 
elapsed_time: 95.219
Episode: 28 Exploration P: 0.4689 Total reward: -3707.3019071637527 SOC: 1.0000 Cumulative_SOC_deviation: 361.3714 Fuel Consumption: 93.5876
maximum steps, simulation is done ... 
elapsed_time: 93.617
Episode: 29 Exploration P: 0.4565 Total reward: -3391.5038066849274 SOC: 1.0000 Cumulative_SOC_deviation: 329.8330 Fuel Consumption: 93.1740
maximum steps, simulation is done ... 
elapsed_time: 93.211
Episode: 30 Exploration P: 0.4444 Total reward: -2729.620120756504 SOC: 1.0000 Cumulative_SOC_deviation: 264.3442 Fuel Consumption: 86.1783
maximum steps, simulation is done ... 
elapsed_time: 91.081
Episode: 31 Exploration P: 0.4326 Total reward: -2454.6320476280184 SOC: 1.0000 Cumulative_SOC_deviation: 236.9527 Fuel Consumption: 85.1052
maximum steps, simulation is done ... 
elapsed_time: 90.281
Episode: 32 Exploration P: 0.4212 Total reward: -1962.9324940443319 SOC: 0.9825 Cumulative_SOC_deviation: 188.0541 Fuel Consumption: 82.3

maximum steps, simulation is done ... 
elapsed_time: 194.884
Episode: 69 Exploration P: 0.1589 Total reward: -3228.340718569124 SOC: 0.2907 Cumulative_SOC_deviation: 319.6137 Fuel Consumption: 32.2041
maximum steps, simulation is done ... 
elapsed_time: 198.102
Episode: 70 Exploration P: 0.1548 Total reward: -3025.893008950376 SOC: 0.3127 Cumulative_SOC_deviation: 299.1801 Fuel Consumption: 34.0923
maximum steps, simulation is done ... 
elapsed_time: 197.337
Episode: 71 Exploration P: 0.1509 Total reward: -3226.904806610637 SOC: 0.2825 Cumulative_SOC_deviation: 319.4897 Fuel Consumption: 32.0080
maximum steps, simulation is done ... 
elapsed_time: 197.637
Episode: 72 Exploration P: 0.1471 Total reward: -2870.1514555222584 SOC: 0.3438 Cumulative_SOC_deviation: 283.4450 Fuel Consumption: 35.7019
maximum steps, simulation is done ... 
elapsed_time: 198.083
Episode: 73 Exploration P: 0.1434 Total reward: -3693.258372463346 SOC: 0.2361 Cumulative_SOC_deviation: 366.4707 Fuel Consumption: 28

maximum steps, simulation is done ... 
elapsed_time: 82.101
Episode: 110 Exploration P: 0.0583 Total reward: -4908.097723088203 SOC: 0.1022 Cumulative_SOC_deviation: 488.7842 Fuel Consumption: 20.2557
maximum steps, simulation is done ... 
elapsed_time: 82.992
Episode: 111 Exploration P: 0.0570 Total reward: -5201.813517237322 SOC: 0.0289 Cumulative_SOC_deviation: 518.7119 Fuel Consumption: 14.6942
maximum steps, simulation is done ... 
elapsed_time: 83.355
Episode: 112 Exploration P: 0.0557 Total reward: -5223.063657800854 SOC: 0.0308 Cumulative_SOC_deviation: 520.8462 Fuel Consumption: 14.6013
maximum steps, simulation is done ... 
elapsed_time: 83.016
Episode: 113 Exploration P: 0.0545 Total reward: -5059.6403498936 SOC: 0.0347 Cumulative_SOC_deviation: 504.4296 Fuel Consumption: 15.3442
maximum steps, simulation is done ... 
elapsed_time: 83.098
Episode: 114 Exploration P: 0.0533 Total reward: -5393.560103796467 SOC: 0.0192 Cumulative_SOC_deviation: 537.9227 Fuel Consumption: 14.33

  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (
  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (


SOC is nan...
elapsed_time: 71.570
Episode: 128 Exploration P: 0.0396 Total reward: -5700.905003771798 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 6.2729
maximum steps, simulation is done ... 
elapsed_time: 83.536
Episode: 129 Exploration P: 0.0388 Total reward: -6050.162332120844 SOC: -0.0672 Cumulative_SOC_deviation: 604.2030 Fuel Consumption: 8.1319
maximum steps, simulation is done ... 
elapsed_time: 83.535
Episode: 130 Exploration P: 0.0380 Total reward: -5600.278805219993 SOC: -0.0298 Cumulative_SOC_deviation: 558.9423 Fuel Consumption: 10.8558
maximum steps, simulation is done ... 
elapsed_time: 83.795
Episode: 131 Exploration P: 0.0372 Total reward: -5823.548585876469 SOC: -0.0525 Cumulative_SOC_deviation: 581.4385 Fuel Consumption: 9.1637
maximum steps, simulation is done ... 
elapsed_time: 83.479
Episode: 132 Exploration P: 0.0365 Total reward: -5865.855792196197 SOC: -0.0485 Cumulative_SOC_deviation: 585.6166 Fuel Consumption: 9.6899
maximum steps, simulation is

SOC is nan...
elapsed_time: 71.562
Episode: 171 Exploration P: 0.0195 Total reward: -5877.772589803241 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 4.5292
SOC is nan...
elapsed_time: 71.326
Episode: 172 Exploration P: 0.0193 Total reward: -5817.03948133004 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 4.9728
SOC is nan...
elapsed_time: 70.744
Episode: 173 Exploration P: 0.0190 Total reward: -5778.1291135696565 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 5.6332
SOC is nan...
elapsed_time: 71.455
Episode: 174 Exploration P: 0.0188 Total reward: -5868.693803453849 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 4.3744
SOC is nan...
elapsed_time: 71.368
Episode: 175 Exploration P: 0.0186 Total reward: -5724.011290142905 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 5.9015
SOC is nan...
elapsed_time: 71.458
Episode: 176 Exploration P: 0.0184 Total reward: -5795.98350607495 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 4.6839

maximum steps, simulation is done ... 
elapsed_time: 71.765
Episode: 3 Exploration P: 0.9217 Total reward: -5121.582990160045 SOC: 1.0000 Cumulative_SOC_deviation: 497.9235 Fuel Consumption: 142.3484
maximum steps, simulation is done ... 
elapsed_time: 81.456
Episode: 4 Exploration P: 0.8970 Total reward: -5167.600232949158 SOC: 1.0000 Cumulative_SOC_deviation: 500.0762 Fuel Consumption: 166.8385
maximum steps, simulation is done ... 
elapsed_time: 81.620
Episode: 5 Exploration P: 0.8730 Total reward: -5259.23064641122 SOC: 1.0000 Cumulative_SOC_deviation: 509.0353 Fuel Consumption: 168.8781
maximum steps, simulation is done ... 
elapsed_time: 81.482
Episode: 6 Exploration P: 0.8496 Total reward: -5186.901796290291 SOC: 1.0000 Cumulative_SOC_deviation: 501.6951 Fuel Consumption: 169.9512
maximum steps, simulation is done ... 
elapsed_time: 81.524
Episode: 7 Exploration P: 0.8269 Total reward: -5209.535583285984 SOC: 1.0000 Cumulative_SOC_deviation: 504.0399 Fuel Consumption: 169.1361
m

maximum steps, simulation is done ... 
elapsed_time: 83.284
Episode: 44 Exploration P: 0.3057 Total reward: -5461.390522192836 SOC: 1.0000 Cumulative_SOC_deviation: 522.0358 Fuel Consumption: 241.0329
maximum steps, simulation is done ... 
elapsed_time: 82.379
Episode: 45 Exploration P: 0.2977 Total reward: -5457.509197199758 SOC: 1.0000 Cumulative_SOC_deviation: 521.8261 Fuel Consumption: 239.2478
maximum steps, simulation is done ... 
elapsed_time: 82.956
Episode: 46 Exploration P: 0.2899 Total reward: -5455.330459010511 SOC: 1.0000 Cumulative_SOC_deviation: 521.0377 Fuel Consumption: 244.9538
maximum steps, simulation is done ... 
elapsed_time: 82.724
Episode: 47 Exploration P: 0.2824 Total reward: -5483.8540705524965 SOC: 1.0000 Cumulative_SOC_deviation: 523.6052 Fuel Consumption: 247.8016
maximum steps, simulation is done ... 
elapsed_time: 82.691
Episode: 48 Exploration P: 0.2750 Total reward: -5470.166815586316 SOC: 1.0000 Cumulative_SOC_deviation: 522.4986 Fuel Consumption: 245

maximum steps, simulation is done ... 
elapsed_time: 83.226
Episode: 85 Exploration P: 0.1059 Total reward: -5506.947455343484 SOC: 1.0000 Cumulative_SOC_deviation: 523.9036 Fuel Consumption: 267.9116
maximum steps, simulation is done ... 
elapsed_time: 82.837
Episode: 86 Exploration P: 0.1033 Total reward: -5521.170980374429 SOC: 1.0000 Cumulative_SOC_deviation: 525.1650 Fuel Consumption: 269.5213
maximum steps, simulation is done ... 
elapsed_time: 82.925
Episode: 87 Exploration P: 0.1008 Total reward: -5510.381586451623 SOC: 1.0000 Cumulative_SOC_deviation: 524.1696 Fuel Consumption: 268.6855
maximum steps, simulation is done ... 
elapsed_time: 82.920
Episode: 88 Exploration P: 0.0983 Total reward: -5515.497622263038 SOC: 1.0000 Cumulative_SOC_deviation: 524.8226 Fuel Consumption: 267.2719
maximum steps, simulation is done ... 
elapsed_time: 82.901
Episode: 89 Exploration P: 0.0960 Total reward: -5516.0801730878375 SOC: 1.0000 Cumulative_SOC_deviation: 524.6518 Fuel Consumption: 269

maximum steps, simulation is done ... 
elapsed_time: 83.015
Episode: 126 Exploration P: 0.0411 Total reward: -5531.06171708217 SOC: 1.0000 Cumulative_SOC_deviation: 525.3379 Fuel Consumption: 277.6829
maximum steps, simulation is done ... 
elapsed_time: 82.421
Episode: 127 Exploration P: 0.0403 Total reward: -5523.915355702206 SOC: 1.0000 Cumulative_SOC_deviation: 524.6274 Fuel Consumption: 277.6416
maximum steps, simulation is done ... 
elapsed_time: 82.569
Episode: 128 Exploration P: 0.0395 Total reward: -5528.971200309955 SOC: 1.0000 Cumulative_SOC_deviation: 525.0896 Fuel Consumption: 278.0750
maximum steps, simulation is done ... 
elapsed_time: 83.022
Episode: 129 Exploration P: 0.0387 Total reward: -5531.885435497399 SOC: 1.0000 Cumulative_SOC_deviation: 525.4306 Fuel Consumption: 277.5797
maximum steps, simulation is done ... 
elapsed_time: 82.993
Episode: 130 Exploration P: 0.0379 Total reward: -5528.882774227986 SOC: 1.0000 Cumulative_SOC_deviation: 525.1024 Fuel Consumption: 

maximum steps, simulation is done ... 
elapsed_time: 82.981
Episode: 167 Exploration P: 0.0201 Total reward: -5529.352572063793 SOC: 1.0000 Cumulative_SOC_deviation: 524.9523 Fuel Consumption: 279.8291
maximum steps, simulation is done ... 
elapsed_time: 82.847
Episode: 168 Exploration P: 0.0198 Total reward: -5536.49907393348 SOC: 1.0000 Cumulative_SOC_deviation: 525.5834 Fuel Consumption: 280.6648
maximum steps, simulation is done ... 
elapsed_time: 83.167
Episode: 169 Exploration P: 0.0196 Total reward: -5529.200967798601 SOC: 1.0000 Cumulative_SOC_deviation: 525.0249 Fuel Consumption: 278.9520
maximum steps, simulation is done ... 
elapsed_time: 83.153
Episode: 170 Exploration P: 0.0193 Total reward: -5534.7625793886555 SOC: 1.0000 Cumulative_SOC_deviation: 525.5326 Fuel Consumption: 279.4370
maximum steps, simulation is done ... 
elapsed_time: 82.896
Episode: 171 Exploration P: 0.0190 Total reward: -5532.548838279656 SOC: 1.0000 Cumulative_SOC_deviation: 525.2421 Fuel Consumption:



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

maximum steps, simulation is done ... 
elapsed_time: 71.726
Episode: 3 Exploration P: 0.9217 Total reward: -5093.735867465697 SOC: 1.0000 Cumulative_SOC_deviation: 494.9786 Fuel Consumption: 143.9499
maximum steps, simulation is done ... 
elapsed_time: 81.167
Episode: 4 Exploration P: 0.8970 Total reward: -5155.315876112356 SOC: 1.0000 Cumulative_SOC_deviation: 499.1959 Fuel Consumption: 163.356

maximum steps, simulation is done ... 
elapsed_time: 82.178
Episode: 37 Exploration P: 0.3684 Total reward: -5414.105208193595 SOC: 1.0000 Cumulative_SOC_deviation: 518.5939 Fuel Consumption: 228.1662
maximum steps, simulation is done ... 
elapsed_time: 82.297
Episode: 38 Exploration P: 0.3587 Total reward: -5427.860576001921 SOC: 1.0000 Cumulative_SOC_deviation: 519.5144 Fuel Consumption: 232.7165
maximum steps, simulation is done ... 
elapsed_time: 82.462
Episode: 39 Exploration P: 0.3493 Total reward: -5445.046148814093 SOC: 1.0000 Cumulative_SOC_deviation: 520.5313 Fuel Consumption: 239.7328
maximum steps, simulation is done ... 
elapsed_time: 82.359
Episode: 40 Exploration P: 0.3401 Total reward: -5432.87404998937 SOC: 1.0000 Cumulative_SOC_deviation: 519.9095 Fuel Consumption: 233.7792
maximum steps, simulation is done ... 
elapsed_time: 82.396
Episode: 41 Exploration P: 0.3311 Total reward: -5444.801495835519 SOC: 1.0000 Cumulative_SOC_deviation: 520.3253 Fuel Consumption: 241.5

maximum steps, simulation is done ... 
elapsed_time: 96.589
Episode: 78 Exploration P: 0.1263 Total reward: -5509.687944189425 SOC: 1.0000 Cumulative_SOC_deviation: 524.3541 Fuel Consumption: 266.1472
maximum steps, simulation is done ... 
elapsed_time: 91.494
Episode: 79 Exploration P: 0.1231 Total reward: -5502.536295501358 SOC: 1.0000 Cumulative_SOC_deviation: 523.4171 Fuel Consumption: 268.3656
maximum steps, simulation is done ... 
elapsed_time: 91.673
Episode: 80 Exploration P: 0.1200 Total reward: -5509.761208470478 SOC: 1.0000 Cumulative_SOC_deviation: 524.3655 Fuel Consumption: 266.1060
maximum steps, simulation is done ... 
elapsed_time: 91.560
Episode: 81 Exploration P: 0.1171 Total reward: -5496.313305685103 SOC: 1.0000 Cumulative_SOC_deviation: 522.8897 Fuel Consumption: 267.4164
maximum steps, simulation is done ... 
elapsed_time: 91.525
Episode: 82 Exploration P: 0.1142 Total reward: -5514.020083652937 SOC: 1.0000 Cumulative_SOC_deviation: 524.5706 Fuel Consumption: 268.

maximum steps, simulation is done ... 
elapsed_time: 199.217
Episode: 119 Exploration P: 0.0477 Total reward: -5525.700906921023 SOC: 1.0000 Cumulative_SOC_deviation: 524.9452 Fuel Consumption: 276.2487
maximum steps, simulation is done ... 
elapsed_time: 198.465
Episode: 120 Exploration P: 0.0467 Total reward: -5529.3892674381905 SOC: 1.0000 Cumulative_SOC_deviation: 525.3646 Fuel Consumption: 275.7431
maximum steps, simulation is done ... 
elapsed_time: 199.061
Episode: 121 Exploration P: 0.0457 Total reward: -5527.54772743875 SOC: 1.0000 Cumulative_SOC_deviation: 525.1629 Fuel Consumption: 275.9185
maximum steps, simulation is done ... 
elapsed_time: 198.716
Episode: 122 Exploration P: 0.0447 Total reward: -5527.540426735246 SOC: 1.0000 Cumulative_SOC_deviation: 525.1622 Fuel Consumption: 275.9185
maximum steps, simulation is done ... 
elapsed_time: 175.638
Episode: 123 Exploration P: 0.0438 Total reward: -5533.302303468804 SOC: 1.0000 Cumulative_SOC_deviation: 525.5857 Fuel Consump

maximum steps, simulation is done ... 
elapsed_time: 89.513
Episode: 160 Exploration P: 0.0222 Total reward: -5529.276922940431 SOC: 1.0000 Cumulative_SOC_deviation: 524.9830 Fuel Consumption: 279.4473
maximum steps, simulation is done ... 
elapsed_time: 89.740
Episode: 161 Exploration P: 0.0219 Total reward: -5533.267272914158 SOC: 1.0000 Cumulative_SOC_deviation: 525.3975 Fuel Consumption: 279.2925
maximum steps, simulation is done ... 
elapsed_time: 82.432
Episode: 162 Exploration P: 0.0216 Total reward: -5529.223035675594 SOC: 1.0000 Cumulative_SOC_deviation: 525.0498 Fuel Consumption: 278.7250
maximum steps, simulation is done ... 
elapsed_time: 80.140
Episode: 163 Exploration P: 0.0213 Total reward: -5531.74314405228 SOC: 1.0000 Cumulative_SOC_deviation: 525.1832 Fuel Consumption: 279.9116
maximum steps, simulation is done ... 
elapsed_time: 80.141
Episode: 164 Exploration P: 0.0210 Total reward: -5534.889735037528 SOC: 1.0000 Cumulative_SOC_deviation: 525.5267 Fuel Consumption: 

In [14]:
with open("DDPG1_mass1200.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)