In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 

from vehicle_model_DDPG2 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64): 
        self.power_mean = 0 
        self.power_std = 0
        self.sum = 0 
        self.sum_deviation = 0 
        self.N = 0 
        
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        self.N += 1 
        index = self.buffer_counter % self.buffer_capacity 
        power = obs_tuple[0][0] 
        
        self.sum += power 
        self.power_mean = self.sum / self.N 
        self.sum_deviation += (power - self.power_mean) ** 2  
        self.power_std = np.sqrt(self.sum_deviation / self.N) 
            
        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)
        
        state_batch = self.state_buffer[batch_indices]
        power_batch = (state_batch[:, 0] - self.power_mean) / self.power_std
        state_batch[:, 0] = power_batch 
        
        next_state_batch = self.next_state_buffer[batch_indices]
        power_batch = (next_state_batch[:, 0] - self.power_mean) / self.power_std
        next_state_batch[:, 0] = power_batch 
#         print(state_batch)
        
        state_batch = tf.convert_to_tensor(state_batch)
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(next_state_batch)
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    out = layers.Dense(512, activation="relu")(inputs)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_out = layers.Dense(16, activation="relu")(state_input)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_input)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 200
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 3000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 

    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
print(env.version)

num_trials = 3
results_dict = {} 
for trial in range(num_trials): 
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    for ep in range(total_episodes): 
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * steps)

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
              "Mean: {:.4f}, STD: {:.4f}".format(buffer.power_mean, buffer.power_std)
        )

    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs 
    }

1
maximum steps, simulation is done ... 
elapsed_time: 31.339
Episode: 1 Exploration P: 1.0000 Total reward: -5144.343264642982 SOC: 1.0000 Cumulative_SOC_deviation: 499.2088 Fuel Consumption: 152.2556 Mean: 2.3655, STD: 6.2718
maximum steps, simulation is done ... 
elapsed_time: 30.623
Episode: 2 Exploration P: 1.0000 Total reward: -5101.54999828179 SOC: 1.0000 Cumulative_SOC_deviation: 495.1936 Fuel Consumption: 149.6142 Mean: 2.3655, STD: 6.2763


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by 

maximum steps, simulation is done ... 
elapsed_time: 77.143
Episode: 22 Exploration P: 0.5511 Total reward: -3946.1529490036796 SOC: 1.0000 Cumulative_SOC_deviation: 384.8810 Fuel Consumption: 97.3425 Mean: 2.3655, STD: 6.2818
maximum steps, simulation is done ... 
elapsed_time: 76.892
Episode: 23 Exploration P: 0.5364 Total reward: -3600.676555934145 SOC: 1.0000 Cumulative_SOC_deviation: 350.1301 Fuel Consumption: 99.3751 Mean: 2.3655, STD: 6.2818
maximum steps, simulation is done ... 
elapsed_time: 78.632
Episode: 24 Exploration P: 0.5222 Total reward: -3570.839239744474 SOC: 1.0000 Cumulative_SOC_deviation: 347.5922 Fuel Consumption: 94.9177 Mean: 2.3655, STD: 6.2819
maximum steps, simulation is done ... 
elapsed_time: 85.432
Episode: 25 Exploration P: 0.5083 Total reward: -3413.707408076835 SOC: 1.0000 Cumulative_SOC_deviation: 331.8924 Fuel Consumption: 94.7836 Mean: 2.3655, STD: 6.2819
maximum steps, simulation is done ... 
elapsed_time: 87.707
Episode: 26 Exploration P: 0.4948 T

maximum steps, simulation is done ... 
elapsed_time: 85.971
Episode: 59 Exploration P: 0.2059 Total reward: -1742.9650192234233 SOC: 0.5287 Cumulative_SOC_deviation: 169.2993 Fuel Consumption: 49.9719 Mean: 2.3655, STD: 6.2823
maximum steps, simulation is done ... 
elapsed_time: 86.719
Episode: 60 Exploration P: 0.2006 Total reward: -2033.4886976697646 SOC: 0.4686 Cumulative_SOC_deviation: 198.8779 Fuel Consumption: 44.7097 Mean: 2.3655, STD: 6.2823
maximum steps, simulation is done ... 
elapsed_time: 86.262
Episode: 61 Exploration P: 0.1954 Total reward: -2900.105775956689 SOC: 0.3686 Cumulative_SOC_deviation: 286.2155 Fuel Consumption: 37.9513 Mean: 2.3655, STD: 6.2823
maximum steps, simulation is done ... 
elapsed_time: 86.045
Episode: 62 Exploration P: 0.1904 Total reward: -2283.9736869647495 SOC: 0.4771 Cumulative_SOC_deviation: 223.8222 Fuel Consumption: 45.7518 Mean: 2.3655, STD: 6.2823
maximum steps, simulation is done ... 
elapsed_time: 88.386
Episode: 63 Exploration P: 0.1855

maximum steps, simulation is done ... 
elapsed_time: 186.522
Episode: 96 Exploration P: 0.0809 Total reward: -194.3103026417289 SOC: 0.6056 Cumulative_SOC_deviation: 14.1484 Fuel Consumption: 52.8263 Mean: 2.3655, STD: 6.2824
maximum steps, simulation is done ... 
elapsed_time: 186.830
Episode: 97 Exploration P: 0.0790 Total reward: -185.51291001172376 SOC: 0.6111 Cumulative_SOC_deviation: 13.4055 Fuel Consumption: 51.4578 Mean: 2.3655, STD: 6.2824
maximum steps, simulation is done ... 
elapsed_time: 185.940
Episode: 98 Exploration P: 0.0771 Total reward: -181.51142253189133 SOC: 0.6057 Cumulative_SOC_deviation: 13.0478 Fuel Consumption: 51.0336 Mean: 2.3655, STD: 6.2824
maximum steps, simulation is done ... 
elapsed_time: 185.606
Episode: 99 Exploration P: 0.0753 Total reward: -201.86809111773815 SOC: 0.6046 Cumulative_SOC_deviation: 15.0845 Fuel Consumption: 51.0236 Mean: 2.3655, STD: 6.2824
maximum steps, simulation is done ... 
elapsed_time: 185.537
Episode: 100 Exploration P: 0.07

maximum steps, simulation is done ... 
elapsed_time: 78.652
Episode: 133 Exploration P: 0.0357 Total reward: -145.85017132472032 SOC: 0.6075 Cumulative_SOC_deviation: 9.1204 Fuel Consumption: 54.6463 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 78.766
Episode: 134 Exploration P: 0.0350 Total reward: -225.41088963849077 SOC: 0.6196 Cumulative_SOC_deviation: 16.9007 Fuel Consumption: 56.4039 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 78.834
Episode: 135 Exploration P: 0.0343 Total reward: -176.85007207995687 SOC: 0.6122 Cumulative_SOC_deviation: 12.3272 Fuel Consumption: 53.5786 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 78.782
Episode: 136 Exploration P: 0.0336 Total reward: -191.61933797804065 SOC: 0.6066 Cumulative_SOC_deviation: 13.7498 Fuel Consumption: 54.1213 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 78.898
Episode: 137 Exploration P: 0.033

maximum steps, simulation is done ... 
elapsed_time: 78.616
Episode: 170 Exploration P: 0.0193 Total reward: -176.63290757522088 SOC: 0.6110 Cumulative_SOC_deviation: 12.0952 Fuel Consumption: 55.6805 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 78.645
Episode: 171 Exploration P: 0.0190 Total reward: -171.16878661780697 SOC: 0.5945 Cumulative_SOC_deviation: 11.7929 Fuel Consumption: 53.2397 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 78.790
Episode: 172 Exploration P: 0.0188 Total reward: -199.00656905640247 SOC: 0.6119 Cumulative_SOC_deviation: 14.3145 Fuel Consumption: 55.8616 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 78.901
Episode: 173 Exploration P: 0.0186 Total reward: -157.69098789318807 SOC: 0.6073 Cumulative_SOC_deviation: 10.3184 Fuel Consumption: 54.5072 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 78.471
Episode: 174 Exploration P: 0.01



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float

maximum steps, simulation is done ... 
elapsed_time: 77.461
Episode: 27 Exploration P: 0.4817 Total reward: -3155.680038102826 SOC: 1.0000 Cumulative_SOC_deviation: 306.2991 Fuel Consumption: 92.6890 Mean: 2.3655, STD: 6.2819
maximum steps, simulation is done ... 
elapsed_time: 77.597
Episode: 28 Exploration P: 0.4689 Total reward: -3084.6991237822294 SOC: 1.0000 Cumulative_SOC_deviation: 299.7138 Fuel Consumption: 87.5609 Mean: 2.3655, STD: 6.2820
maximum steps, simulation is done ... 
elapsed_time: 77.530
Episode: 29 Exploration P: 0.4565 Total reward: -3167.8776580004705 SOC: 1.0000 Cumulative_SOC_deviation: 307.6736 Fuel Consumption: 91.1413 Mean: 2.3655, STD: 6.2820
maximum steps, simulation is done ... 
elapsed_time: 77.591
Episode: 30 Exploration P: 0.4444 Total reward: -2950.8239174271475 SOC: 1.0000 Cumulative_SOC_deviation: 286.0787 Fuel Consumption: 90.0372 Mean: 2.3655, STD: 6.2820
maximum steps, simulation is done ... 
elapsed_time: 77.515
Episode: 31 Exploration P: 0.4326

maximum steps, simulation is done ... 
elapsed_time: 78.189
Episode: 64 Exploration P: 0.1808 Total reward: -2462.803572660488 SOC: 0.4328 Cumulative_SOC_deviation: 242.0426 Fuel Consumption: 42.3778 Mean: 2.3655, STD: 6.2823
maximum steps, simulation is done ... 
elapsed_time: 77.799
Episode: 65 Exploration P: 0.1761 Total reward: -1928.888518519011 SOC: 0.4822 Cumulative_SOC_deviation: 188.3457 Fuel Consumption: 45.4319 Mean: 2.3655, STD: 6.2823
maximum steps, simulation is done ... 
elapsed_time: 78.352
Episode: 66 Exploration P: 0.1716 Total reward: -3691.1595661553556 SOC: 0.2684 Cumulative_SOC_deviation: 366.0070 Fuel Consumption: 31.0897 Mean: 2.3655, STD: 6.2823
maximum steps, simulation is done ... 
elapsed_time: 78.377
Episode: 67 Exploration P: 0.1673 Total reward: -2004.8380952763794 SOC: 0.7773 Cumulative_SOC_deviation: 193.5678 Fuel Consumption: 69.1601 Mean: 2.3655, STD: 6.2823
maximum steps, simulation is done ... 
elapsed_time: 77.965
Episode: 68 Exploration P: 0.1630 

maximum steps, simulation is done ... 
elapsed_time: 77.658
Episode: 101 Exploration P: 0.0718 Total reward: -689.5133987713206 SOC: 0.6828 Cumulative_SOC_deviation: 63.1197 Fuel Consumption: 58.3165 Mean: 2.3655, STD: 6.2824
maximum steps, simulation is done ... 
elapsed_time: 77.898
Episode: 102 Exploration P: 0.0701 Total reward: -1300.284437399361 SOC: 0.6799 Cumulative_SOC_deviation: 124.0404 Fuel Consumption: 59.8808 Mean: 2.3655, STD: 6.2824
maximum steps, simulation is done ... 
elapsed_time: 77.673
Episode: 103 Exploration P: 0.0685 Total reward: -1409.4956880696927 SOC: 0.6849 Cumulative_SOC_deviation: 134.9159 Fuel Consumption: 60.3365 Mean: 2.3655, STD: 6.2824
maximum steps, simulation is done ... 
elapsed_time: 78.048
Episode: 104 Exploration P: 0.0669 Total reward: -1224.5976345380489 SOC: 0.6728 Cumulative_SOC_deviation: 116.5166 Fuel Consumption: 59.4312 Mean: 2.3655, STD: 6.2824
maximum steps, simulation is done ... 
elapsed_time: 78.055
Episode: 105 Exploration P: 0.0

maximum steps, simulation is done ... 
elapsed_time: 77.916
Episode: 138 Exploration P: 0.0324 Total reward: -1372.7045455753964 SOC: 0.6751 Cumulative_SOC_deviation: 131.2763 Fuel Consumption: 59.9414 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 77.991
Episode: 139 Exploration P: 0.0318 Total reward: -1112.4053055649172 SOC: 0.6748 Cumulative_SOC_deviation: 105.2224 Fuel Consumption: 60.1813 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 78.103
Episode: 140 Exploration P: 0.0312 Total reward: -1232.5247292913227 SOC: 0.6788 Cumulative_SOC_deviation: 117.2067 Fuel Consumption: 60.4578 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 77.797
Episode: 141 Exploration P: 0.0306 Total reward: -1449.7698009824187 SOC: 0.6963 Cumulative_SOC_deviation: 138.7986 Fuel Consumption: 61.7834 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 77.429
Episode: 142 Exploration P: 

maximum steps, simulation is done ... 
elapsed_time: 77.926
Episode: 175 Exploration P: 0.0181 Total reward: -457.44541446946886 SOC: 0.6337 Cumulative_SOC_deviation: 40.3233 Fuel Consumption: 54.2120 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 77.771
Episode: 176 Exploration P: 0.0179 Total reward: -625.5997386275612 SOC: 0.6390 Cumulative_SOC_deviation: 57.0439 Fuel Consumption: 55.1610 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 77.845
Episode: 177 Exploration P: 0.0177 Total reward: -614.0327808782649 SOC: 0.6336 Cumulative_SOC_deviation: 55.9034 Fuel Consumption: 54.9985 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 77.874
Episode: 178 Exploration P: 0.0175 Total reward: -741.5115708192621 SOC: 0.6438 Cumulative_SOC_deviation: 68.5111 Fuel Consumption: 56.4001 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 78.076
Episode: 179 Exploration P: 0.0173 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

maximum steps, simulation is done ... 
elapsed_time: 67.953
Episode: 3 Exploration P: 0.9217 Total reward: -5109.315614425439 SOC: 1.0000 Cumulative_SOC_deviation: 496.5698 Fuel Consumption: 143.6172 Mean: 2.3655, STD: 6.2781
maximum steps, simulation is done ... 
elapsed_time: 76.351
Episode: 4 Exploration P: 0.8970 Total reward: -5058.693280282669 SOC: 1.0000 Cumulative_SOC_deviation: 491.6271

maximum steps, simulation is done ... 
elapsed_time: 77.791
Episode: 33 Exploration P: 0.4100 Total reward: -2447.3718399655077 SOC: 0.9959 Cumulative_SOC_deviation: 236.4454 Fuel Consumption: 82.9177 Mean: 2.3655, STD: 6.2821
maximum steps, simulation is done ... 
elapsed_time: 77.354
Episode: 34 Exploration P: 0.3992 Total reward: -2269.409698866668 SOC: 1.0000 Cumulative_SOC_deviation: 218.5904 Fuel Consumption: 83.5059 Mean: 2.3655, STD: 6.2821
maximum steps, simulation is done ... 
elapsed_time: 77.386
Episode: 35 Exploration P: 0.3887 Total reward: -1725.7539691289878 SOC: 0.9522 Cumulative_SOC_deviation: 164.5127 Fuel Consumption: 80.6271 Mean: 2.3655, STD: 6.2821
maximum steps, simulation is done ... 
elapsed_time: 77.616
Episode: 36 Exploration P: 0.3784 Total reward: -1779.641126501781 SOC: 0.9250 Cumulative_SOC_deviation: 170.1625 Fuel Consumption: 78.0166 Mean: 2.3655, STD: 6.2821
maximum steps, simulation is done ... 
elapsed_time: 77.701
Episode: 37 Exploration P: 0.3684 

maximum steps, simulation is done ... 
elapsed_time: 77.774
Episode: 70 Exploration P: 0.1548 Total reward: -853.6648220878827 SOC: 0.7148 Cumulative_SOC_deviation: 79.3596 Fuel Consumption: 60.0689 Mean: 2.3655, STD: 6.2823
maximum steps, simulation is done ... 
elapsed_time: 78.146
Episode: 71 Exploration P: 0.1509 Total reward: -1192.496510953383 SOC: 0.7063 Cumulative_SOC_deviation: 113.2361 Fuel Consumption: 60.1350 Mean: 2.3655, STD: 6.2823
maximum steps, simulation is done ... 
elapsed_time: 78.054
Episode: 72 Exploration P: 0.1471 Total reward: -1161.92504118852 SOC: 0.6708 Cumulative_SOC_deviation: 110.4791 Fuel Consumption: 57.1340 Mean: 2.3655, STD: 6.2823
maximum steps, simulation is done ... 
elapsed_time: 78.153
Episode: 73 Exploration P: 0.1434 Total reward: -672.928827220369 SOC: 0.6624 Cumulative_SOC_deviation: 61.7709 Fuel Consumption: 55.2194 Mean: 2.3655, STD: 6.2824
maximum steps, simulation is done ... 
elapsed_time: 77.775
Episode: 74 Exploration P: 0.1398 Total 

maximum steps, simulation is done ... 
elapsed_time: 85.248
Episode: 107 Exploration P: 0.0624 Total reward: -228.6145847148187 SOC: 0.6122 Cumulative_SOC_deviation: 17.4769 Fuel Consumption: 53.8454 Mean: 2.3655, STD: 6.2824
maximum steps, simulation is done ... 
elapsed_time: 86.259
Episode: 108 Exploration P: 0.0610 Total reward: -253.21858040667357 SOC: 0.6143 Cumulative_SOC_deviation: 19.8903 Fuel Consumption: 54.3156 Mean: 2.3655, STD: 6.2824
maximum steps, simulation is done ... 
elapsed_time: 86.303
Episode: 109 Exploration P: 0.0596 Total reward: -247.9147065217374 SOC: 0.6140 Cumulative_SOC_deviation: 19.4134 Fuel Consumption: 53.7802 Mean: 2.3655, STD: 6.2824
maximum steps, simulation is done ... 
elapsed_time: 86.105
Episode: 110 Exploration P: 0.0583 Total reward: -252.2791450641222 SOC: 0.6130 Cumulative_SOC_deviation: 19.9416 Fuel Consumption: 52.8634 Mean: 2.3655, STD: 6.2824
maximum steps, simulation is done ... 
elapsed_time: 86.464
Episode: 111 Exploration P: 0.0570 

maximum steps, simulation is done ... 
elapsed_time: 187.305
Episode: 144 Exploration P: 0.0290 Total reward: -240.08900327327675 SOC: 0.5975 Cumulative_SOC_deviation: 18.7253 Fuel Consumption: 52.8360 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 187.589
Episode: 145 Exploration P: 0.0285 Total reward: -269.9703732947689 SOC: 0.5966 Cumulative_SOC_deviation: 21.6784 Fuel Consumption: 53.1864 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 186.703
Episode: 146 Exploration P: 0.0280 Total reward: -247.56086669695603 SOC: 0.6019 Cumulative_SOC_deviation: 19.4304 Fuel Consumption: 53.2573 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 186.577
Episode: 147 Exploration P: 0.0275 Total reward: -248.70608449969134 SOC: 0.6005 Cumulative_SOC_deviation: 19.5259 Fuel Consumption: 53.4475 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 188.010
Episode: 148 Exploration P: 

maximum steps, simulation is done ... 
elapsed_time: 85.551
Episode: 181 Exploration P: 0.0169 Total reward: -379.6800385416271 SOC: 0.5986 Cumulative_SOC_deviation: 32.5374 Fuel Consumption: 54.3062 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 85.223
Episode: 182 Exploration P: 0.0167 Total reward: -258.45542773616813 SOC: 0.6007 Cumulative_SOC_deviation: 20.4837 Fuel Consumption: 53.6184 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 85.189
Episode: 183 Exploration P: 0.0165 Total reward: -264.63718461988066 SOC: 0.6084 Cumulative_SOC_deviation: 21.0268 Fuel Consumption: 54.3693 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 85.159
Episode: 184 Exploration P: 0.0163 Total reward: -181.3760357625124 SOC: 0.5998 Cumulative_SOC_deviation: 12.8368 Fuel Consumption: 53.0076 Mean: 2.3655, STD: 6.2825
maximum steps, simulation is done ... 
elapsed_time: 85.120
Episode: 185 Exploration P: 0.0162

In [13]:
with open("DDPG2_mass1200.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)