In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 

from vehicle_model_DDPG12_1 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_e-4wd_Battery.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_id_75_110_Westinghouse.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64): 
        self.power_mean = 0 
        self.power_std = 0
        self.sum = 0 
        self.sum_deviation = 0 
        self.N = 0 
        
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        self.N += 1 
        index = self.buffer_counter % self.buffer_capacity 
        power = obs_tuple[0][0] 
        
        self.sum += power 
        self.power_mean = self.sum / self.N 
        self.sum_deviation += (power - self.power_mean) ** 2  
        self.power_std = np.sqrt(self.sum_deviation / self.N) 
            
        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)
        
        state_batch = self.state_buffer[batch_indices]
        power_batch = (state_batch[:, 0] - self.power_mean) / self.power_std
        state_batch[:, 0] = power_batch 
        
        next_state_batch = self.next_state_buffer[batch_indices]
        power_batch = (next_state_batch[:, 0] - self.power_mean) / self.power_std
        next_state_batch[:, 0] = power_batch 
#         print(state_batch)
        
        state_batch = tf.convert_to_tensor(state_batch)
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(next_state_batch)
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    out = layers.Dense(512, activation="relu")(inputs)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_out = layers.Dense(16, activation="relu")(state_input)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 100
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 3000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 

    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [None]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    actor_model.save_weights("./{}/actor_model_checkpoint".format(root))
    critic_model.save_weights("./{}/critic_model_checkpoint".format(root))
    target_actor.save_weights("./{}/target_actor_checkpoint".format(root))
    target_critic.save_weights("./{}/target_critic_checkpoint".format(root))
    print("model is saved..")

In [None]:
print(env.version)

num_trials = 10
results_dict = {} 
for trial in range(num_trials): 
    print()
    print("Trial {}".format(trial))
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    for ep in range(total_episodes): 
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * steps)

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
    
    root = "DDPG12_1_trial{}".format(trial+1)
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs 
    }

1
maximum steps, simulation is done ... 
elapsed_time: 30.328
Episode: 1 Exploration P: 1.0000 Total reward: -979.0321351634449 SOC: 0.7952 Cumulative_SOC_deviation: 91.8397 Fuel Consumption: 60.6348
maximum steps, simulation is done ... 
elapsed_time: 28.623
Episode: 2 Exploration P: 1.0000 Total reward: -914.4053961903279 SOC: 0.7825 Cumulative_SOC_deviation: 85.4789 Fuel Consumption: 59.6163


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



maximum steps, simulation is done ... 
elapsed_time: 126.069
Episode: 28 Exploration P: 0.4689 Total reward: -485.2877935495941 SOC: 0.6582 Cumulative_SOC_deviation: 43.5699 Fuel Consumption: 49.5888
maximum steps, simulation is done ... 
elapsed_time: 126.470
Episode: 29 Exploration P: 0.4565 Total reward: -523.9127705216332 SOC: 0.6589 Cumulative_SOC_deviation: 47.4011 Fuel Consumption: 49.9013
maximum steps, simulation is done ... 
elapsed_time: 128.266
Episode: 30 Exploration P: 0.4444 Total reward: -499.5151826707928 SOC: 0.6665 Cumulative_SOC_deviation: 44.9243 Fuel Consumption: 50.2725
maximum steps, simulation is done ... 
elapsed_time: 129.531
Episode: 31 Exploration P: 0.4326 Total reward: -452.21855308875223 SOC: 0.6543 Cumulative_SOC_deviation: 40.2969 Fuel Consumption: 49.2499
maximum steps, simulation is done ... 
elapsed_time: 127.603
Episode: 32 Exploration P: 0.4212 Total reward: -466.3266543511347 SOC: 0.6450 Cumulative_SOC_deviation: 41.7775 Fuel Consumption: 48.5513

maximum steps, simulation is done ... 
elapsed_time: 129.526
Episode: 69 Exploration P: 0.1589 Total reward: -348.1890317521266 SOC: 0.6234 Cumulative_SOC_deviation: 30.1682 Fuel Consumption: 46.5074
maximum steps, simulation is done ... 
elapsed_time: 128.867
Episode: 70 Exploration P: 0.1548 Total reward: -491.35685708900513 SOC: 0.6335 Cumulative_SOC_deviation: 44.3410 Fuel Consumption: 47.9473
maximum steps, simulation is done ... 
elapsed_time: 128.461
Episode: 71 Exploration P: 0.1509 Total reward: -469.2418593955377 SOC: 0.6179 Cumulative_SOC_deviation: 42.2466 Fuel Consumption: 46.7757
maximum steps, simulation is done ... 
elapsed_time: 128.936
Episode: 72 Exploration P: 0.1471 Total reward: -666.2758986156788 SOC: 0.6198 Cumulative_SOC_deviation: 61.8618 Fuel Consumption: 47.6582
maximum steps, simulation is done ... 
elapsed_time: 130.002
Episode: 73 Exploration P: 0.1434 Total reward: -436.97036770578393 SOC: 0.6217 Cumulative_SOC_deviation: 39.0131 Fuel Consumption: 46.839



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

maximum steps, simulation is done ... 
elapsed_time: 110.000
Episode: 3 Exploration P: 0.9217 Total reward: -817.8467266499229 SOC: 0.7678 Cumulative_SOC_deviation: 75.9327 Fuel Consumption: 58.5199
maximum steps, simulation is done ... 
elapsed_time: 127.102
Episode: 4 Exploration P: 0.8970 Total reward: -772.4927750342309 SOC: 0.7217 Cumulative_SOC_deviation: 71.7682 Fuel Consumption: 54.8104


Available condition is not avail... SOC: 1
elapsed_time: 70.700
Episode: 37 Exploration P: 0.3709 Total reward: -2743.6067742781 SOC: 1.0000 Cumulative_SOC_deviation: 265.1854 Fuel Consumption: 91.7523
Available condition is not avail... SOC: 1
elapsed_time: 70.777
Episode: 38 Exploration P: 0.3616 Total reward: -2837.373626448052 SOC: 1.0000 Cumulative_SOC_deviation: 274.4777 Fuel Consumption: 92.5965
Available condition is not avail... SOC: 1
elapsed_time: 70.795
Episode: 39 Exploration P: 0.3525 Total reward: -2806.155764509319 SOC: 1.0000 Cumulative_SOC_deviation: 271.2955 Fuel Consumption: 93.2009
Available condition is not avail... SOC: 1
elapsed_time: 71.580
Episode: 40 Exploration P: 0.3437 Total reward: -2931.51928339482 SOC: 1.0000 Cumulative_SOC_deviation: 283.7472 Fuel Consumption: 94.0472
Available condition is not avail... SOC: 1
elapsed_time: 70.562
Episode: 41 Exploration P: 0.3351 Total reward: -2901.098587600245 SOC: 1.0000 Cumulative_SOC_deviation: 280.7035 Fuel Cons

Available condition is not avail... SOC: 1
elapsed_time: 71.154
Episode: 77 Exploration P: 0.1370 Total reward: -3318.6702929883472 SOC: 1.0000 Cumulative_SOC_deviation: 321.4349 Fuel Consumption: 104.3212
Available condition is not avail... SOC: 1
elapsed_time: 71.179
Episode: 78 Exploration P: 0.1338 Total reward: -3357.9575691720474 SOC: 1.0000 Cumulative_SOC_deviation: 325.2569 Fuel Consumption: 105.3889
Available condition is not avail... SOC: 1
elapsed_time: 71.356
Episode: 79 Exploration P: 0.1306 Total reward: -3363.316204643639 SOC: 1.0000 Cumulative_SOC_deviation: 325.8054 Fuel Consumption: 105.2627
Available condition is not avail... SOC: 1
elapsed_time: 71.431
Episode: 80 Exploration P: 0.1275 Total reward: -3357.819631489848 SOC: 1.0000 Cumulative_SOC_deviation: 325.2414 Fuel Consumption: 105.4060
Available condition is not avail... SOC: 1
elapsed_time: 71.530
Episode: 81 Exploration P: 0.1244 Total reward: -3345.543430953888 SOC: 1.0000 Cumulative_SOC_deviation: 324.0600 

maximum steps, simulation is done ... 
elapsed_time: 74.412
Episode: 4 Exploration P: 0.8970 Total reward: -813.3642270784641 SOC: 0.7504 Cumulative_SOC_deviation: 75.6308 Fuel Consumption: 57.0564
maximum steps, simulation is done ... 
elapsed_time: 74.457
Episode: 5 Exploration P: 0.8730 Total reward: -838.8139053097177 SOC: 0.7580 Cumulative_SOC_deviation: 78.1206 Fuel Consumption: 57.6076
maximum steps, simulation is done ... 
elapsed_time: 74.108
Episode: 6 Exploration P: 0.8496 Total reward: -843.6422412321546 SOC: 0.7575 Cumulative_SOC_deviation: 78.6162 Fuel Consumption: 57.4805
maximum steps, simulation is done ... 
elapsed_time: 74.137
Episode: 7 Exploration P: 0.8269 Total reward: -754.8415974977516 SOC: 0.6977 Cumulative_SOC_deviation: 70.1747 Fuel Consumption: 53.0950
maximum steps, simulation is done ... 
elapsed_time: 74.032
Episode: 8 Exploration P: 0.8048 Total reward: -740.8451256584459 SOC: 0.6861 Cumulative_SOC_deviation: 68.8745 Fuel Consumption: 52.1004
maximum st

maximum steps, simulation is done ... 
elapsed_time: 75.335
Episode: 45 Exploration P: 0.2997 Total reward: -398.32975112103605 SOC: 0.6477 Cumulative_SOC_deviation: 34.9690 Fuel Consumption: 48.6393
maximum steps, simulation is done ... 
elapsed_time: 74.825
Episode: 46 Exploration P: 0.2918 Total reward: -383.5888850440485 SOC: 0.6371 Cumulative_SOC_deviation: 33.5868 Fuel Consumption: 47.7212
maximum steps, simulation is done ... 
elapsed_time: 75.316
Episode: 47 Exploration P: 0.2842 Total reward: -320.09331381685786 SOC: 0.6239 Cumulative_SOC_deviation: 27.3501 Fuel Consumption: 46.5921
maximum steps, simulation is done ... 
elapsed_time: 75.267
Episode: 48 Exploration P: 0.2768 Total reward: -336.7717316508716 SOC: 0.6228 Cumulative_SOC_deviation: 29.0093 Fuel Consumption: 46.6785
maximum steps, simulation is done ... 
elapsed_time: 75.912
Episode: 49 Exploration P: 0.2696 Total reward: -341.4670928891673 SOC: 0.6200 Cumulative_SOC_deviation: 29.5214 Fuel Consumption: 46.2530
max

maximum steps, simulation is done ... 
elapsed_time: 69.401
Episode: 87 Exploration P: 0.1014 Total reward: -310.611437223501 SOC: 0.6227 Cumulative_SOC_deviation: 26.4587 Fuel Consumption: 46.0242
maximum steps, simulation is done ... 
elapsed_time: 69.361
Episode: 88 Exploration P: 0.0989 Total reward: -343.5626649079317 SOC: 0.6234 Cumulative_SOC_deviation: 29.7397 Fuel Consumption: 46.1659
maximum steps, simulation is done ... 
elapsed_time: 64.099
Episode: 89 Exploration P: 0.0965 Total reward: -316.98266259999673 SOC: 0.6207 Cumulative_SOC_deviation: 27.1034 Fuel Consumption: 45.9486
maximum steps, simulation is done ... 
elapsed_time: 60.550
Episode: 90 Exploration P: 0.0942 Total reward: -306.63299795613403 SOC: 0.6210 Cumulative_SOC_deviation: 26.0668 Fuel Consumption: 45.9653
maximum steps, simulation is done ... 
elapsed_time: 60.862
Episode: 91 Exploration P: 0.0919 Total reward: -290.7364183500701 SOC: 0.6198 Cumulative_SOC_deviation: 24.4906 Fuel Consumption: 45.8305
maxi

maximum steps, simulation is done ... 
elapsed_time: 59.867
Episode: 14 Exploration P: 0.6840 Total reward: -1464.6459554764506 SOC: 0.5476 Cumulative_SOC_deviation: 142.3142 Fuel Consumption: 41.5044
maximum steps, simulation is done ... 
elapsed_time: 59.489
Episode: 15 Exploration P: 0.6658 Total reward: -1352.42034985699 SOC: 0.5629 Cumulative_SOC_deviation: 130.9650 Fuel Consumption: 42.7700
maximum steps, simulation is done ... 
elapsed_time: 59.714
Episode: 16 Exploration P: 0.6480 Total reward: -1574.840834794049 SOC: 0.5322 Cumulative_SOC_deviation: 153.4310 Fuel Consumption: 40.5308
maximum steps, simulation is done ... 
elapsed_time: 59.911
Episode: 17 Exploration P: 0.6307 Total reward: -1669.9764513837115 SOC: 0.5128 Cumulative_SOC_deviation: 163.1010 Fuel Consumption: 38.9666
maximum steps, simulation is done ... 
elapsed_time: 59.440
Episode: 18 Exploration P: 0.6139 Total reward: -1617.6716329070493 SOC: 0.5251 Cumulative_SOC_deviation: 157.7759 Fuel Consumption: 39.912

maximum steps, simulation is done ... 
elapsed_time: 61.075
Episode: 56 Exploration P: 0.2227 Total reward: -281.6497509211117 SOC: 0.6143 Cumulative_SOC_deviation: 23.5983 Fuel Consumption: 45.6672
maximum steps, simulation is done ... 
elapsed_time: 60.881
Episode: 57 Exploration P: 0.2170 Total reward: -335.3202227099576 SOC: 0.6222 Cumulative_SOC_deviation: 28.8837 Fuel Consumption: 46.4829
maximum steps, simulation is done ... 
elapsed_time: 61.046
Episode: 58 Exploration P: 0.2114 Total reward: -309.4469355837288 SOC: 0.6208 Cumulative_SOC_deviation: 26.2963 Fuel Consumption: 46.4844
maximum steps, simulation is done ... 
elapsed_time: 61.559
Episode: 59 Exploration P: 0.2059 Total reward: -293.891384851802 SOC: 0.6139 Cumulative_SOC_deviation: 24.8217 Fuel Consumption: 45.6744
maximum steps, simulation is done ... 
elapsed_time: 61.430
Episode: 60 Exploration P: 0.2006 Total reward: -306.6467734141336 SOC: 0.6249 Cumulative_SOC_deviation: 26.0221 Fuel Consumption: 46.4256
maximu

maximum steps, simulation is done ... 
elapsed_time: 61.125
Episode: 98 Exploration P: 0.0771 Total reward: -246.88150417251518 SOC: 0.6145 Cumulative_SOC_deviation: 20.0801 Fuel Consumption: 46.0804
maximum steps, simulation is done ... 
elapsed_time: 60.861
Episode: 99 Exploration P: 0.0753 Total reward: -251.4710411790755 SOC: 0.6159 Cumulative_SOC_deviation: 20.5201 Fuel Consumption: 46.2701
maximum steps, simulation is done ... 
elapsed_time: 60.808
Episode: 100 Exploration P: 0.0735 Total reward: -271.67562194573816 SOC: 0.6169 Cumulative_SOC_deviation: 22.5289 Fuel Consumption: 46.3862
maximum steps, simulation is done ... 
elapsed_time: 14.386
Episode: 1 Exploration P: 1.0000 Total reward: -950.0448366537948 SOC: 0.7828 Cumulative_SOC_deviation: 89.0361 Fuel Consumption: 59.6837
maximum steps, simulation is done ... 
elapsed_time: 14.290
Episode: 2 Exploration P: 1.0000 Total reward: -877.8126745820556 SOC: 0.7703 Cumulative_SOC_deviation: 81.9221 Fuel Consumption: 58.5913


To

maximum steps, simulation is done ... 
elapsed_time: 59.524
Episode: 25 Exploration P: 0.5083 Total reward: -2294.8475657336 SOC: 0.4254 Cumulative_SOC_deviation: 226.2302 Fuel Consumption: 32.5453
maximum steps, simulation is done ... 
elapsed_time: 59.880
Episode: 26 Exploration P: 0.4948 Total reward: -2418.717611891156 SOC: 0.4069 Cumulative_SOC_deviation: 238.7526 Fuel Consumption: 31.1919
maximum steps, simulation is done ... 
elapsed_time: 59.708
Episode: 27 Exploration P: 0.4817 Total reward: -2381.186225652184 SOC: 0.4077 Cumulative_SOC_deviation: 234.9870 Fuel Consumption: 31.3160
maximum steps, simulation is done ... 
elapsed_time: 59.630
Episode: 28 Exploration P: 0.4689 Total reward: -2555.6801650918474 SOC: 0.3811 Cumulative_SOC_deviation: 252.6400 Fuel Consumption: 29.2800
maximum steps, simulation is done ... 
elapsed_time: 59.781
Episode: 29 Exploration P: 0.4565 Total reward: -2663.2710254166122 SOC: 0.3630 Cumulative_SOC_deviation: 263.5325 Fuel Consumption: 27.9459


maximum steps, simulation is done ... 
elapsed_time: 60.120
Episode: 66 Exploration P: 0.1721 Total reward: -401.4570597969743 SOC: 0.6475 Cumulative_SOC_deviation: 35.3393 Fuel Consumption: 48.0637
maximum steps, simulation is done ... 
elapsed_time: 60.879
Episode: 67 Exploration P: 0.1677 Total reward: -437.338968418891 SOC: 0.6543 Cumulative_SOC_deviation: 38.8748 Fuel Consumption: 48.5907
maximum steps, simulation is done ... 
elapsed_time: 60.601
Episode: 68 Exploration P: 0.1634 Total reward: -374.9362231086689 SOC: 0.6440 Cumulative_SOC_deviation: 32.7204 Fuel Consumption: 47.7324
maximum steps, simulation is done ... 
elapsed_time: 60.274
Episode: 69 Exploration P: 0.1593 Total reward: -401.6350314184216 SOC: 0.6431 Cumulative_SOC_deviation: 35.3751 Fuel Consumption: 47.8837
maximum steps, simulation is done ... 
elapsed_time: 60.344
Episode: 70 Exploration P: 0.1552 Total reward: -369.765864921134 SOC: 0.6386 Cumulative_SOC_deviation: 32.2220 Fuel Consumption: 47.5459
maximum



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

maximum steps, simulation is done ... 
elapsed_time: 52.614
Episode: 3 Exploration P: 0.9217 Total reward: -949.3475983852189 SOC: 0.7923 Cumulative_SOC_deviation: 88.8785 Fuel Consumption: 60.5622
maximum steps, simulation is done ... 
elapsed_time: 60.364
Episode: 4 Exploration P: 0.8970 Total reward: -836.0830244244004 SOC: 0.7478 Cumulative_SOC_deviation: 77.9036 Fuel Consumption: 57.0468
ma

maximum steps, simulation is done ... 
elapsed_time: 59.486
Episode: 37 Exploration P: 0.3684 Total reward: -2989.232395122365 SOC: 0.3249 Cumulative_SOC_deviation: 296.3754 Fuel Consumption: 25.4788
maximum steps, simulation is done ... 
elapsed_time: 62.137
Episode: 38 Exploration P: 0.3587 Total reward: -2918.3753117596925 SOC: 0.3224 Cumulative_SOC_deviation: 289.3235 Fuel Consumption: 25.1407
maximum steps, simulation is done ... 
elapsed_time: 58.790
Episode: 39 Exploration P: 0.3493 Total reward: -3123.5783401105455 SOC: 0.2855 Cumulative_SOC_deviation: 310.0987 Fuel Consumption: 22.5912
maximum steps, simulation is done ... 
elapsed_time: 57.079
Episode: 40 Exploration P: 0.3401 Total reward: -3012.84132749004 SOC: 0.3160 Cumulative_SOC_deviation: 298.8148 Fuel Consumption: 24.6935
maximum steps, simulation is done ... 
elapsed_time: 56.734
Episode: 41 Exploration P: 0.3311 Total reward: -3285.685086563321 SOC: 0.2707 Cumulative_SOC_deviation: 326.4038 Fuel Consumption: 21.6475

In [None]:
with open("DDPG6_1.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)