In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 

from vehicle_model_DDPG2 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_e-4wd_Battery.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_id_75_110_Westinghouse.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64): 
        self.power_mean = 0 
        self.power_std = 0
        self.sum = 0 
        self.sum_deviation = 0 
        self.N = 0 
        
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        self.N += 1 
        index = self.buffer_counter % self.buffer_capacity 
        power = obs_tuple[0][0] 
        
        self.sum += power 
        self.power_mean = self.sum / self.N 
        self.sum_deviation += (power - self.power_mean) ** 2  
        self.power_std = np.sqrt(self.sum_deviation / self.N) 
            
        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)
        
        state_batch = self.state_buffer[batch_indices]
        power_batch = (state_batch[:, 0] - self.power_mean) / self.power_std
        state_batch[:, 0] = power_batch 
        
        next_state_batch = self.next_state_buffer[batch_indices]
        power_batch = (next_state_batch[:, 0] - self.power_mean) / self.power_std
        next_state_batch[:, 0] = power_batch 
#         print(state_batch)
        
        state_batch = tf.convert_to_tensor(state_batch)
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(next_state_batch)
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    out = layers.Dense(512, activation="relu")(inputs)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_out = layers.Dense(16, activation="relu")(state_input)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_input)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 200
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 3000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 

    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
print(env.version)

num_trials = 3
results_dict = {} 
for trial in range(num_trials): 
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    for ep in range(total_episodes): 
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * steps)

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
              "Mean: {:.4f}, STD: {:.4f}".format(buffer.power_mean, buffer.power_std)
        )

    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs 
    }

1
Available condition is not avail... SOC: 1
elapsed_time: 21.864
Episode: 1 Exploration P: 1.0000 Total reward: -3307.748476306719 SOC: 1.0000 Cumulative_SOC_deviation: 323.1043 Fuel Consumption: 76.7056 Mean: 2.2368, STD: 5.0389
Available condition is not avail... SOC: 1
elapsed_time: 24.057
Episode: 2 Exploration P: 1.0000 Total reward: -3081.1689001913314 SOC: 1.0000 Cumulative_SOC_deviation: 300.4793 Fuel Consumption: 76.3755 Mean: 2.2368, STD: 5.0426


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocas

maximum steps, simulation is done ... 
elapsed_time: 80.805
Episode: 22 Exploration P: 0.5548 Total reward: -1111.8921334589047 SOC: 0.7996 Cumulative_SOC_deviation: 105.4109 Fuel Consumption: 57.7827 Mean: 2.1352, STD: 5.0242
maximum steps, simulation is done ... 
elapsed_time: 80.777
Episode: 23 Exploration P: 0.5400 Total reward: -863.5418571926253 SOC: 0.7427 Cumulative_SOC_deviation: 81.0021 Fuel Consumption: 53.5213 Mean: 2.1339, STD: 5.0240
maximum steps, simulation is done ... 
elapsed_time: 80.866
Episode: 24 Exploration P: 0.5257 Total reward: -949.0377524195727 SOC: 0.7759 Cumulative_SOC_deviation: 89.3019 Fuel Consumption: 56.0183 Mean: 2.1328, STD: 5.0237
maximum steps, simulation is done ... 
elapsed_time: 80.711
Episode: 25 Exploration P: 0.5117 Total reward: -747.799749555121 SOC: 0.7489 Cumulative_SOC_deviation: 69.3566 Fuel Consumption: 54.2333 Mean: 2.1317, STD: 5.0235
maximum steps, simulation is done ... 
elapsed_time: 80.905
Episode: 26 Exploration P: 0.4981 Total

maximum steps, simulation is done ... 
elapsed_time: 81.111
Episode: 59 Exploration P: 0.2072 Total reward: -196.0454304182059 SOC: 0.6306 Cumulative_SOC_deviation: 15.1643 Fuel Consumption: 44.4028 Mean: 2.1173, STD: 5.0204
maximum steps, simulation is done ... 
elapsed_time: 81.398
Episode: 60 Exploration P: 0.2019 Total reward: -130.18059893491298 SOC: 0.6051 Cumulative_SOC_deviation: 8.8152 Fuel Consumption: 42.0288 Mean: 2.1171, STD: 5.0203
maximum steps, simulation is done ... 
elapsed_time: 81.548
Episode: 61 Exploration P: 0.1967 Total reward: -133.56558270167412 SOC: 0.6029 Cumulative_SOC_deviation: 9.1204 Fuel Consumption: 42.3617 Mean: 2.1169, STD: 5.0203
maximum steps, simulation is done ... 
elapsed_time: 81.362
Episode: 62 Exploration P: 0.1916 Total reward: -152.5705164632572 SOC: 0.6011 Cumulative_SOC_deviation: 11.0235 Fuel Consumption: 42.3352 Mean: 2.1168, STD: 5.0202
maximum steps, simulation is done ... 
elapsed_time: 81.570
Episode: 63 Exploration P: 0.1867 Total 

maximum steps, simulation is done ... 
elapsed_time: 82.552
Episode: 96 Exploration P: 0.0814 Total reward: -143.43052585765392 SOC: 0.6142 Cumulative_SOC_deviation: 10.1260 Fuel Consumption: 42.1702 Mean: 2.1132, STD: 5.0195
maximum steps, simulation is done ... 
elapsed_time: 81.957
Episode: 97 Exploration P: 0.0795 Total reward: -112.06640971442869 SOC: 0.6053 Cumulative_SOC_deviation: 7.1010 Fuel Consumption: 41.0564 Mean: 2.1131, STD: 5.0195
maximum steps, simulation is done ... 
elapsed_time: 83.114
Episode: 98 Exploration P: 0.0776 Total reward: -102.99820269067203 SOC: 0.6060 Cumulative_SOC_deviation: 6.1769 Fuel Consumption: 41.2293 Mean: 2.1131, STD: 5.0195
maximum steps, simulation is done ... 
elapsed_time: 82.035
Episode: 99 Exploration P: 0.0758 Total reward: -89.5344637105874 SOC: 0.6062 Cumulative_SOC_deviation: 4.8408 Fuel Consumption: 41.1262 Mean: 2.1130, STD: 5.0194
maximum steps, simulation is done ... 
elapsed_time: 81.589
Episode: 100 Exploration P: 0.0740 Total 

maximum steps, simulation is done ... 
elapsed_time: 81.834
Episode: 133 Exploration P: 0.0359 Total reward: -139.38855596197715 SOC: 0.6063 Cumulative_SOC_deviation: 9.7673 Fuel Consumption: 41.7151 Mean: 2.1114, STD: 5.0191
maximum steps, simulation is done ... 
elapsed_time: 81.699
Episode: 134 Exploration P: 0.0352 Total reward: -115.96886795543391 SOC: 0.6040 Cumulative_SOC_deviation: 7.4586 Fuel Consumption: 41.3826 Mean: 2.1114, STD: 5.0191
maximum steps, simulation is done ... 
elapsed_time: 81.583
Episode: 135 Exploration P: 0.0345 Total reward: -135.20721815862728 SOC: 0.6037 Cumulative_SOC_deviation: 9.3797 Fuel Consumption: 41.4106 Mean: 2.1113, STD: 5.0191
maximum steps, simulation is done ... 
elapsed_time: 82.024
Episode: 136 Exploration P: 0.0338 Total reward: -123.88474380673541 SOC: 0.6038 Cumulative_SOC_deviation: 8.2788 Fuel Consumption: 41.0965 Mean: 2.1113, STD: 5.0191
maximum steps, simulation is done ... 
elapsed_time: 83.808
Episode: 137 Exploration P: 0.0332 T

maximum steps, simulation is done ... 
elapsed_time: 81.782
Episode: 170 Exploration P: 0.0194 Total reward: -108.11612098013033 SOC: 0.6105 Cumulative_SOC_deviation: 6.6419 Fuel Consumption: 41.6969 Mean: 2.1104, STD: 5.0189
maximum steps, simulation is done ... 
elapsed_time: 81.565
Episode: 171 Exploration P: 0.0191 Total reward: -130.50357949542766 SOC: 0.5928 Cumulative_SOC_deviation: 8.9825 Fuel Consumption: 40.6783 Mean: 2.1104, STD: 5.0189
maximum steps, simulation is done ... 
elapsed_time: 81.380
Episode: 172 Exploration P: 0.0189 Total reward: -162.92566083028038 SOC: 0.6024 Cumulative_SOC_deviation: 11.9946 Fuel Consumption: 42.9798 Mean: 2.1104, STD: 5.0189
maximum steps, simulation is done ... 
elapsed_time: 81.584
Episode: 173 Exploration P: 0.0186 Total reward: -103.4001159690034 SOC: 0.6046 Cumulative_SOC_deviation: 6.1911 Fuel Consumption: 41.4895 Mean: 2.1103, STD: 5.0189
maximum steps, simulation is done ... 
elapsed_time: 81.643
Episode: 174 Exploration P: 0.0184 T



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Available condition is not avail... SOC: 0.9988796349385357
elapsed_time: 60.010
Episode: 3 Explo

maximum steps, simulation is done ... 
elapsed_time: 82.070
Episode: 30 Exploration P: 0.4473 Total reward: -566.7483522818159 SOC: 0.6769 Cumulative_SOC_deviation: 51.7850 Fuel Consumption: 48.8988 Mean: 2.1275, STD: 5.0226
maximum steps, simulation is done ... 
elapsed_time: 81.397
Episode: 31 Exploration P: 0.4355 Total reward: -552.0198724670125 SOC: 0.6844 Cumulative_SOC_deviation: 50.2265 Fuel Consumption: 49.7552 Mean: 2.1268, STD: 5.0224
maximum steps, simulation is done ... 
elapsed_time: 81.368
Episode: 32 Exploration P: 0.4240 Total reward: -698.3110373974553 SOC: 0.6305 Cumulative_SOC_deviation: 65.2497 Fuel Consumption: 45.8137 Mean: 2.1262, STD: 5.0223
maximum steps, simulation is done ... 
elapsed_time: 81.304
Episode: 33 Exploration P: 0.4128 Total reward: -582.3311700521274 SOC: 0.6460 Cumulative_SOC_deviation: 53.5486 Fuel Consumption: 46.8455 Mean: 2.1256, STD: 5.0222
maximum steps, simulation is done ... 
elapsed_time: 81.168
Episode: 34 Exploration P: 0.4019 Total 

maximum steps, simulation is done ... 
elapsed_time: 81.755
Episode: 67 Exploration P: 0.1683 Total reward: -237.32095575969956 SOC: 0.6181 Cumulative_SOC_deviation: 19.4174 Fuel Consumption: 43.1466 Mean: 2.1160, STD: 5.0201
maximum steps, simulation is done ... 
elapsed_time: 78.673
Episode: 68 Exploration P: 0.1640 Total reward: -208.48683904373704 SOC: 0.6166 Cumulative_SOC_deviation: 16.5310 Fuel Consumption: 43.1771 Mean: 2.1159, STD: 5.0201
maximum steps, simulation is done ... 
elapsed_time: 81.163
Episode: 69 Exploration P: 0.1599 Total reward: -186.41926794775847 SOC: 0.6157 Cumulative_SOC_deviation: 14.3675 Fuel Consumption: 42.7443 Mean: 2.1157, STD: 5.0200
maximum steps, simulation is done ... 
elapsed_time: 82.034
Episode: 70 Exploration P: 0.1558 Total reward: -180.70905573035907 SOC: 0.6106 Cumulative_SOC_deviation: 13.8576 Fuel Consumption: 42.1332 Mean: 2.1156, STD: 5.0200
maximum steps, simulation is done ... 
elapsed_time: 81.630
Episode: 71 Exploration P: 0.1519 To

maximum steps, simulation is done ... 
elapsed_time: 83.536
Episode: 104 Exploration P: 0.0673 Total reward: -95.87933726131693 SOC: 0.6058 Cumulative_SOC_deviation: 5.4737 Fuel Consumption: 41.1420 Mean: 2.1127, STD: 5.0194
maximum steps, simulation is done ... 
elapsed_time: 81.999
Episode: 105 Exploration P: 0.0658 Total reward: -140.44545734571477 SOC: 0.6057 Cumulative_SOC_deviation: 9.8968 Fuel Consumption: 41.4775 Mean: 2.1127, STD: 5.0194
maximum steps, simulation is done ... 
elapsed_time: 81.688
Episode: 106 Exploration P: 0.0643 Total reward: -99.49906895464069 SOC: 0.6087 Cumulative_SOC_deviation: 5.8186 Fuel Consumption: 41.3128 Mean: 2.1126, STD: 5.0193
maximum steps, simulation is done ... 
elapsed_time: 81.808
Episode: 107 Exploration P: 0.0628 Total reward: -121.80946209591683 SOC: 0.6113 Cumulative_SOC_deviation: 8.0367 Fuel Consumption: 41.4423 Mean: 2.1125, STD: 5.0193
maximum steps, simulation is done ... 
elapsed_time: 82.362
Episode: 108 Exploration P: 0.0614 Tot

maximum steps, simulation is done ... 
elapsed_time: 81.790
Episode: 141 Exploration P: 0.0308 Total reward: -116.02177650295137 SOC: 0.6019 Cumulative_SOC_deviation: 7.5037 Fuel Consumption: 40.9846 Mean: 2.1111, STD: 5.0190
maximum steps, simulation is done ... 
elapsed_time: 81.970
Episode: 142 Exploration P: 0.0302 Total reward: -102.37317876835682 SOC: 0.6010 Cumulative_SOC_deviation: 6.1281 Fuel Consumption: 41.0926 Mean: 2.1111, STD: 5.0190
maximum steps, simulation is done ... 
elapsed_time: 81.920
Episode: 143 Exploration P: 0.0296 Total reward: -129.4198127784279 SOC: 0.6008 Cumulative_SOC_deviation: 8.8244 Fuel Consumption: 41.1761 Mean: 2.1111, STD: 5.0190
maximum steps, simulation is done ... 
elapsed_time: 81.491
Episode: 144 Exploration P: 0.0291 Total reward: -103.7946574058235 SOC: 0.6014 Cumulative_SOC_deviation: 6.2849 Fuel Consumption: 40.9453 Mean: 2.1111, STD: 5.0190
maximum steps, simulation is done ... 
elapsed_time: 81.714
Episode: 145 Exploration P: 0.0286 Tot

maximum steps, simulation is done ... 
elapsed_time: 81.775
Episode: 178 Exploration P: 0.0175 Total reward: -113.49561366807112 SOC: 0.6000 Cumulative_SOC_deviation: 7.2881 Fuel Consumption: 40.6147 Mean: 2.1102, STD: 5.0188
maximum steps, simulation is done ... 
elapsed_time: 82.039
Episode: 179 Exploration P: 0.0173 Total reward: -106.59652176686184 SOC: 0.6012 Cumulative_SOC_deviation: 6.5839 Fuel Consumption: 40.7574 Mean: 2.1102, STD: 5.0188
maximum steps, simulation is done ... 
elapsed_time: 82.117
Episode: 180 Exploration P: 0.0171 Total reward: -98.1438006089424 SOC: 0.6015 Cumulative_SOC_deviation: 5.7558 Fuel Consumption: 40.5859 Mean: 2.1102, STD: 5.0188
maximum steps, simulation is done ... 
elapsed_time: 82.658
Episode: 181 Exploration P: 0.0169 Total reward: -135.0351732772012 SOC: 0.5985 Cumulative_SOC_deviation: 9.4596 Fuel Consumption: 40.4394 Mean: 2.1102, STD: 5.0188
maximum steps, simulation is done ... 
elapsed_time: 82.828
Episode: 182 Exploration P: 0.0167 Tota



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Available condition is not avail... SOC: 1
elapsed_time: 54.341
Episode: 3 Exploration P: 0.9255 Total reward: -2988.800193728721 SOC: 1.0000 Cumulative_SOC_deviation: 291.6201 Fuel Consumption: 72.5990 Mean: 2.2368, STD: 5.0441
Available condition is not avail... SOC: 0.998215173719458
elapsed_time: 68.685
Episode: 4 Exploration P: 0.9019 Total reward: -2859.8869908494526 SOC: 0.9982 Cumulative_SOC_deviation: 278.8206 Fuel Consumption: 71.6807 Mean: 2.2368, STD: 5.0449
Available condition is not avail... SOC: 0.9985754558547004
elapsed_time: 69.201
Episode: 5 Exploration P: 0.8789 Total reward: -2910.112837288288 SOC: 0.9986 Cumulative_SOC_deviation: 283.8473 Fuel Consumption: 71.6394 Mean

maximum steps, simulation is done ... 
elapsed_time: 73.718
Episode: 36 Exploration P: 0.3809 Total reward: -821.6120726819954 SOC: 0.6112 Cumulative_SOC_deviation: 77.7191 Fuel Consumption: 44.4207 Mean: 2.1240, STD: 5.0218
maximum steps, simulation is done ... 
elapsed_time: 74.283
Episode: 37 Exploration P: 0.3709 Total reward: -837.1105369728457 SOC: 0.6229 Cumulative_SOC_deviation: 79.1617 Fuel Consumption: 45.4938 Mean: 2.1236, STD: 5.0217
maximum steps, simulation is done ... 
elapsed_time: 73.579
Episode: 38 Exploration P: 0.3611 Total reward: -1044.2880935153687 SOC: 0.5595 Cumulative_SOC_deviation: 100.3644 Fuel Consumption: 40.6443 Mean: 2.1231, STD: 5.0216
maximum steps, simulation is done ... 
elapsed_time: 73.158
Episode: 39 Exploration P: 0.3516 Total reward: -951.6332122037911 SOC: 0.5863 Cumulative_SOC_deviation: 90.8956 Fuel Consumption: 42.6770 Mean: 2.1227, STD: 5.0215
maximum steps, simulation is done ... 
elapsed_time: 72.965
Episode: 40 Exploration P: 0.3423 Tota

maximum steps, simulation is done ... 
elapsed_time: 73.392
Episode: 73 Exploration P: 0.1443 Total reward: -252.92297848550137 SOC: 0.6133 Cumulative_SOC_deviation: 20.9389 Fuel Consumption: 43.5344 Mean: 2.1152, STD: 5.0199
maximum steps, simulation is done ... 
elapsed_time: 73.655
Episode: 74 Exploration P: 0.1406 Total reward: -222.50902946922344 SOC: 0.6149 Cumulative_SOC_deviation: 17.9293 Fuel Consumption: 43.2162 Mean: 2.1151, STD: 5.0199
maximum steps, simulation is done ... 
elapsed_time: 73.849
Episode: 75 Exploration P: 0.1371 Total reward: -209.76176744678614 SOC: 0.6104 Cumulative_SOC_deviation: 16.6999 Fuel Consumption: 42.7633 Mean: 2.1150, STD: 5.0199
maximum steps, simulation is done ... 
elapsed_time: 74.301
Episode: 76 Exploration P: 0.1337 Total reward: -168.22500223221016 SOC: 0.6146 Cumulative_SOC_deviation: 12.5493 Fuel Consumption: 42.7318 Mean: 2.1149, STD: 5.0198
maximum steps, simulation is done ... 
elapsed_time: 74.157
Episode: 77 Exploration P: 0.1303 To

maximum steps, simulation is done ... 
elapsed_time: 73.476
Episode: 110 Exploration P: 0.0586 Total reward: -152.99858522489103 SOC: 0.6114 Cumulative_SOC_deviation: 11.0624 Fuel Consumption: 42.3747 Mean: 2.1124, STD: 5.0193
maximum steps, simulation is done ... 
elapsed_time: 73.301
Episode: 111 Exploration P: 0.0573 Total reward: -142.90476781179697 SOC: 0.6081 Cumulative_SOC_deviation: 10.0845 Fuel Consumption: 42.0593 Mean: 2.1123, STD: 5.0193
maximum steps, simulation is done ... 
elapsed_time: 73.682
Episode: 112 Exploration P: 0.0560 Total reward: -139.8458175164398 SOC: 0.6069 Cumulative_SOC_deviation: 9.7509 Fuel Consumption: 42.3364 Mean: 2.1123, STD: 5.0193
maximum steps, simulation is done ... 
elapsed_time: 74.444
Episode: 113 Exploration P: 0.0548 Total reward: -131.85460838260875 SOC: 0.6084 Cumulative_SOC_deviation: 8.9749 Fuel Consumption: 42.1059 Mean: 2.1122, STD: 5.0193
maximum steps, simulation is done ... 
elapsed_time: 73.707
Episode: 114 Exploration P: 0.0536 

maximum steps, simulation is done ... 
elapsed_time: 73.846
Episode: 147 Exploration P: 0.0276 Total reward: -121.76730351052156 SOC: 0.6085 Cumulative_SOC_deviation: 8.0493 Fuel Consumption: 41.2739 Mean: 2.1110, STD: 5.0190
maximum steps, simulation is done ... 
elapsed_time: 73.983
Episode: 148 Exploration P: 0.0271 Total reward: -134.33351682928458 SOC: 0.6073 Cumulative_SOC_deviation: 9.2759 Fuel Consumption: 41.5740 Mean: 2.1109, STD: 5.0190
maximum steps, simulation is done ... 
elapsed_time: 74.167
Episode: 149 Exploration P: 0.0267 Total reward: -112.24774079046362 SOC: 0.6073 Cumulative_SOC_deviation: 7.0909 Fuel Consumption: 41.3391 Mean: 2.1109, STD: 5.0190
maximum steps, simulation is done ... 
elapsed_time: 74.017
Episode: 150 Exploration P: 0.0262 Total reward: -88.08285946252543 SOC: 0.6030 Cumulative_SOC_deviation: 4.7133 Fuel Consumption: 40.9500 Mean: 2.1109, STD: 5.0190
maximum steps, simulation is done ... 
elapsed_time: 74.099
Episode: 151 Exploration P: 0.0258 To

maximum steps, simulation is done ... 
elapsed_time: 73.780
Episode: 184 Exploration P: 0.0164 Total reward: -84.01181522442931 SOC: 0.6030 Cumulative_SOC_deviation: 4.3521 Fuel Consumption: 40.4906 Mean: 2.1101, STD: 5.0188
maximum steps, simulation is done ... 
elapsed_time: 73.528
Episode: 185 Exploration P: 0.0162 Total reward: -81.5893547676322 SOC: 0.5999 Cumulative_SOC_deviation: 4.1347 Fuel Consumption: 40.2425 Mean: 2.1101, STD: 5.0188
maximum steps, simulation is done ... 
elapsed_time: 73.660
Episode: 186 Exploration P: 0.0160 Total reward: -79.61945055007689 SOC: 0.6036 Cumulative_SOC_deviation: 3.9087 Fuel Consumption: 40.5323 Mean: 2.1101, STD: 5.0188
maximum steps, simulation is done ... 
elapsed_time: 73.604
Episode: 187 Exploration P: 0.0159 Total reward: -77.65330542707967 SOC: 0.6006 Cumulative_SOC_deviation: 3.7283 Fuel Consumption: 40.3702 Mean: 2.1101, STD: 5.0188
maximum steps, simulation is done ... 
elapsed_time: 73.871
Episode: 188 Exploration P: 0.0157 Total 

In [13]:
with open("DDPG2_400.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)