In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 
import scipy.io as sio

from vehicle_model_variant_2 import Environment 
from cell_model import CellModel 
from driver_MDP import Driver_MDP 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
# env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)
driver = Driver_MDP(0.02)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
#     inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
#     state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 150
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1.0 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 3000 

In [11]:
def initialization(weights_root=None): 
    actor_model = get_actor() 
    critic_model = get_critic() 
    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())

    if weights_root is not None:     
        print("model is loaded on {}".format(weights_root))
        actor_model.load_weights("./{}/actor_model.h5".format(weights_root))
        critic_model.load_weights("./{}/critic_model.h5".format(weights_root))
        target_actor.load_weights("./{}/target_actor.h5".format(weights_root))
        target_critic.load_weights("./{}/target_critic.h5".format(weights_root))
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    if not os.path.exists(root): 
        os.makedirs(root)
        
    actor_model.save_weights("./{}/actor_model.h5".format(root))
    critic_model.save_weights("./{}/critic_model.h5".format(root))
    target_actor.save_weights("./{}/target_actor.h5".format(root))
    target_critic.save_weights("./{}/target_critic.h5".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor, consider_degradation):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor, consider_degradation)
    return env 

In [14]:
def test_agent(actor_model, reward_factor, consider_degradation):
    test_cycle = driver.get_cycle() 
    env = initialization_env(test_cycle, reward_factor, consider_degradation)
    
    total_reward = 0
    state = env.reset() 
    while True: 
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        action = policy_epsilon_greedy(tf_state, -1)
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
        
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    degradation_total = np.sum(np.array(env.history["degradation"])) 
    print("******************* Test is start *****************")
#     print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
         "SOC: {:.4f}".format(env.SOC), 
         "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
         "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
         "Degradation total: {:.4f}".format(degradation_total)
         )
         
    print("******************* Test is done *****************")
    print("")
    plt.subplot(1, 2, 1)
    plt.plot(test_cycle)
    plt.subplot(1, 2, 2)
    plt.plot(env.history["Action"])
    plt.show() 
    return env.history  
    

In [15]:
# print(env.version)

num_trials = 3
results_dict = {} 
driving_cycle_paths = glob.glob("../data/driving_cycles/city/*.mat")[:1]
reward_factor = 10 
consider_degradation = False 

for trial in range(num_trials): 
    print("")
    print("Trial {}".format(trial))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_train_history = [] 
#     episode_test_history = [] 
#     episode_num_test = [] 
    for ep in range(total_episodes):
        i = ep % len(driving_cycle_paths)
        driving_cycle_path =driving_cycle_paths[i]
        print(driving_cycle_path)
        drv_cycle = sio.loadmat(driving_cycle_path)
        driving_cycle = drv_cycle["sch_cycle"][:, 1]

        env = initialization_env(driving_cycle, reward_factor, consider_degradation)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward)
        episode_train_history.append(env.history)

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        degradation_total = np.sum(np.array(env.history["degradation"]))
        print(
            'Episode: {}'.format(ep + 1),
            "Exploration P: {:.4f}".format(eps),
            'Total reward: {}'.format(episodic_reward), 
            "SOC: {:.4f}".format(env.SOC), 
            "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
            "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
            "Total degradation: {:.4f}".format(degradation_total), 
        )
        print("")
        
#         if (ep + 1) % 20 == 0: 
#             history = test_agent(actor_model, reward_factor, consider_degradation)
#             episode_test_history.append(history) 
#             episode_num_test.append(ep + 1)
            
    root = "DDPG_power_in_kW"
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "train_history": episode_train_history, 
    }


Trial 0

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 10.552
Episode: 1 Exploration P: 1.0000 Total reward: -5003.754764816008 SOC: 0.9992 Cumulative_SOC_deviation: 481.0900 Fuel Consumption: 192.8543 Total degradation: 528.4870

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 11.446
Episode: 2 Exploration P: 1.0000 Total reward: -5088.876122756413 SOC: 1.0000 Cumulative_SOC_deviation: 490.0155 Fuel Consumption: 188.7208 Total degradation: 528.5079

../data/driving_cycles/city\01_FTP72_fuds.mat


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('f

maximum steps, simulation is done ... 
elapsed_time: 69.336
Episode: 21 Exploration P: 0.6006 Total reward: -2547.8470687388785 SOC: 0.9732 Cumulative_SOC_deviation: 239.5616 Fuel Consumption: 152.2308 Total degradation: 448.8738

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.519
Episode: 22 Exploration P: 0.5846 Total reward: -2922.2419034930517 SOC: 0.9864 Cumulative_SOC_deviation: 276.9135 Fuel Consumption: 153.1069 Total degradation: 444.2411

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.622
Episode: 23 Exploration P: 0.5690 Total reward: -2398.6229885267185 SOC: 0.9809 Cumulative_SOC_deviation: 224.5207 Fuel Consumption: 153.4162 Total degradation: 428.0279

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.598
Episode: 24 Exploration P: 0.5539 Total reward: -2142.3378554989563 SOC: 0.9740 Cumulative_SOC_deviation: 198

  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (


maximum steps, simulation is done ... 
elapsed_time: 70.059
Episode: 42 Exploration P: 0.3418 Total reward: -4827.445893780506 SOC: 0.0691 Cumulative_SOC_deviation: 473.7795 Fuel Consumption: 89.6514 Total degradation: 287.7052

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.812
Episode: 43 Exploration P: 0.3328 Total reward: -4458.872493051437 SOC: 0.0780 Cumulative_SOC_deviation: 436.9036 Fuel Consumption: 89.8369 Total degradation: 294.8253

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.037
Episode: 44 Exploration P: 0.3240 Total reward: -4132.5950004642245 SOC: 0.1126 Cumulative_SOC_deviation: 403.9738 Fuel Consumption: 92.8571 Total degradation: 298.9975

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.063
Episode: 45 Exploration P: 0.3155 Total reward: -545.1743857779264 SOC: 0.6315 Cumulative_SOC_deviation: 41.9946 F

maximum steps, simulation is done ... 
elapsed_time: 70.351
Episode: 72 Exploration P: 0.1556 Total reward: -217.94723800583145 SOC: 0.6054 Cumulative_SOC_deviation: 9.9946 Fuel Consumption: 118.0017 Total degradation: 479.9789

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.268
Episode: 73 Exploration P: 0.1516 Total reward: -201.61324539872265 SOC: 0.5951 Cumulative_SOC_deviation: 8.5529 Fuel Consumption: 116.0843 Total degradation: 482.8438

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.259
Episode: 74 Exploration P: 0.1478 Total reward: -226.42128271001278 SOC: 0.5922 Cumulative_SOC_deviation: 11.1474 Fuel Consumption: 114.9471 Total degradation: 479.5508

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.450
Episode: 75 Exploration P: 0.1441 Total reward: -239.19718654997803 SOC: 0.5958 Cumulative_SOC_deviation: 12.3878 

maximum steps, simulation is done ... 
elapsed_time: 70.418
Episode: 102 Exploration P: 0.0739 Total reward: -250.77952816138554 SOC: 0.5902 Cumulative_SOC_deviation: 13.8290 Fuel Consumption: 112.4895 Total degradation: 481.9944

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.245
Episode: 103 Exploration P: 0.0721 Total reward: -259.38020072926054 SOC: 0.5878 Cumulative_SOC_deviation: 14.8729 Fuel Consumption: 110.6509 Total degradation: 481.1382

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.392
Episode: 104 Exploration P: 0.0705 Total reward: -289.61531870391684 SOC: 0.5901 Cumulative_SOC_deviation: 17.7733 Fuel Consumption: 111.8824 Total degradation: 474.4759

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.245
Episode: 105 Exploration P: 0.0688 Total reward: -269.09980901101454 SOC: 0.5838 Cumulative_SOC_deviation: 15

maximum steps, simulation is done ... 
elapsed_time: 71.498
Episode: 132 Exploration P: 0.0380 Total reward: -274.8381816944382 SOC: 0.5929 Cumulative_SOC_deviation: 16.2720 Fuel Consumption: 112.1183 Total degradation: 478.1870

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.513
Episode: 133 Exploration P: 0.0373 Total reward: -282.79867025947425 SOC: 0.5806 Cumulative_SOC_deviation: 17.1750 Fuel Consumption: 111.0485 Total degradation: 479.0192

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.362
Episode: 134 Exploration P: 0.0365 Total reward: -298.6425846454519 SOC: 0.5874 Cumulative_SOC_deviation: 18.6602 Fuel Consumption: 112.0401 Total degradation: 476.1017

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.465
Episode: 135 Exploration P: 0.0358 Total reward: -302.4036883424253 SOC: 0.5782 Cumulative_SOC_deviation: 19.17

maximum steps, simulation is done ... 
elapsed_time: 59.672
Episode: 3 Exploration P: 0.9781 Total reward: -4967.778712317973 SOC: 1.0000 Cumulative_SOC_deviation: 477.9285 Fuel Consumption: 188.4940 Total degradation: 533.5519

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.832
Episode: 4 Exploration P: 0.9519 Total reward: -4988.1539874016535 SOC: 0.9999 Cumulative_SOC_deviation: 480.1392 Fuel Consumption: 186.7623 Total degradation: 527.3394

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.997
Episode: 5 Exploration P: 0.9264 Total reward: -4916.097333907009 SOC: 0.9999 Cumulative_SOC_deviation: 473.2963 Fuel Consumption: 183.1339 Total degradation: 525.7617

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.233
Episode: 6 Exploration P: 0.9016 Total reward: -4874.028872424927 SOC: 1.0000 Cumulative_SOC_deviation: 469.3513 F

maximum steps, simulation is done ... 
elapsed_time: 71.365
Episode: 33 Exploration P: 0.4348 Total reward: -1948.4572618832356 SOC: 0.4763 Cumulative_SOC_deviation: 183.1727 Fuel Consumption: 116.7303 Total degradation: 364.8294

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.187
Episode: 34 Exploration P: 0.4233 Total reward: -1982.2390305867789 SOC: 0.4702 Cumulative_SOC_deviation: 186.5437 Fuel Consumption: 116.8024 Total degradation: 362.3572

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.477
Episode: 35 Exploration P: 0.4121 Total reward: -3140.8568764756287 SOC: 0.2821 Cumulative_SOC_deviation: 303.8187 Fuel Consumption: 102.6703 Total degradation: 335.2369

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.687
Episode: 36 Exploration P: 0.4012 Total reward: -2859.9500291786962 SOC: 0.3192 Cumulative_SOC_deviation: 275

maximum steps, simulation is done ... 
elapsed_time: 71.395
Episode: 63 Exploration P: 0.1964 Total reward: -418.65897308751204 SOC: 0.5929 Cumulative_SOC_deviation: 30.1724 Fuel Consumption: 116.9350 Total degradation: 519.0268

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.752
Episode: 64 Exploration P: 0.1913 Total reward: -318.62121904464584 SOC: 0.5792 Cumulative_SOC_deviation: 20.3995 Fuel Consumption: 114.6257 Total degradation: 529.4780

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.282
Episode: 65 Exploration P: 0.1864 Total reward: -424.33960075619007 SOC: 0.5866 Cumulative_SOC_deviation: 30.9056 Fuel Consumption: 115.2836 Total degradation: 534.0474

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.284
Episode: 66 Exploration P: 0.1816 Total reward: -458.1985210001171 SOC: 0.5761 Cumulative_SOC_deviation: 34.4624

maximum steps, simulation is done ... 
elapsed_time: 72.715
Episode: 93 Exploration P: 0.0918 Total reward: -362.93210077457354 SOC: 0.5924 Cumulative_SOC_deviation: 24.8835 Fuel Consumption: 114.0971 Total degradation: 527.7407

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 72.412
Episode: 94 Exploration P: 0.0896 Total reward: -343.25531749698 SOC: 0.5916 Cumulative_SOC_deviation: 22.9792 Fuel Consumption: 113.4638 Total degradation: 513.1846

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 72.718
Episode: 95 Exploration P: 0.0874 Total reward: -354.26696579514726 SOC: 0.5948 Cumulative_SOC_deviation: 24.1944 Fuel Consumption: 112.3227 Total degradation: 529.7995

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 72.494
Episode: 96 Exploration P: 0.0853 Total reward: -348.0507220894496 SOC: 0.5937 Cumulative_SOC_deviation: 23.4594 Fu

maximum steps, simulation is done ... 
elapsed_time: 71.199
Episode: 123 Exploration P: 0.0459 Total reward: -419.12823291224566 SOC: 0.5874 Cumulative_SOC_deviation: 30.3803 Fuel Consumption: 115.3248 Total degradation: 494.0371

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.760
Episode: 124 Exploration P: 0.0449 Total reward: -388.82706672806415 SOC: 0.5894 Cumulative_SOC_deviation: 27.5952 Fuel Consumption: 112.8750 Total degradation: 498.1716

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.615
Episode: 125 Exploration P: 0.0440 Total reward: -408.41764260716786 SOC: 0.5928 Cumulative_SOC_deviation: 29.4290 Fuel Consumption: 114.1278 Total degradation: 499.4178

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.793
Episode: 126 Exploration P: 0.0430 Total reward: -386.7997677352033 SOC: 0.5915 Cumulative_SOC_deviation: 27.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float

maximum steps, simulation is done ... 
elapsed_time: 70.833
Episode: 23 Exploration P: 0.5690 Total reward: -2052.6512854368098 SOC: 0.9548 Cumulative_SOC_deviation: 190.0585 Fuel Consumption: 152.0658 Total degradation: 433.9185

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.182
Episode: 24 Exploration P: 0.5539 Total reward: -1332.8868310582768 SOC: 0.8694 Cumulative_SOC_deviation: 118.7202 Fuel Consumption: 145.6852 Total degradation: 413.4766

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.502
Episode: 25 Exploration P: 0.5391 Total reward: -1575.966756377515 SOC: 0.9108 Cumulative_SOC_deviation: 142.6509 Fuel Consumption: 149.4579 Total degradation: 411.8050

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.735
Episode: 26 Exploration P: 0.5248 Total reward: -913.2558597364696 SOC: 0.7810 Cumulative_SOC_deviation: 77.28

maximum steps, simulation is done ... 
elapsed_time: 70.225
Episode: 53 Exploration P: 0.2553 Total reward: -369.8854662718664 SOC: 0.5807 Cumulative_SOC_deviation: 24.9172 Fuel Consumption: 120.7133 Total degradation: 468.9219

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.426
Episode: 54 Exploration P: 0.2486 Total reward: -411.8759675894467 SOC: 0.5776 Cumulative_SOC_deviation: 29.1944 Fuel Consumption: 119.9323 Total degradation: 474.6711

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.299
Episode: 55 Exploration P: 0.2422 Total reward: -510.3376567672649 SOC: 0.5741 Cumulative_SOC_deviation: 39.0900 Fuel Consumption: 119.4372 Total degradation: 490.4473

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.639
Episode: 56 Exploration P: 0.2359 Total reward: -405.96304856376526 SOC: 0.5774 Cumulative_SOC_deviation: 28.6984 F

maximum steps, simulation is done ... 
elapsed_time: 71.147
Episode: 83 Exploration P: 0.1176 Total reward: -295.1670401206003 SOC: 0.5932 Cumulative_SOC_deviation: 18.4099 Fuel Consumption: 111.0682 Total degradation: 505.6757

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.261
Episode: 84 Exploration P: 0.1147 Total reward: -263.16258625485665 SOC: 0.5930 Cumulative_SOC_deviation: 15.1503 Fuel Consumption: 111.6597 Total degradation: 515.2579

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.893
Episode: 85 Exploration P: 0.1119 Total reward: -324.64456964366366 SOC: 0.5897 Cumulative_SOC_deviation: 21.3756 Fuel Consumption: 110.8881 Total degradation: 510.6688

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.383
Episode: 86 Exploration P: 0.1091 Total reward: -295.58655658858305 SOC: 0.5946 Cumulative_SOC_deviation: 18.4083

maximum steps, simulation is done ... 
elapsed_time: 63.359
Episode: 113 Exploration P: 0.0572 Total reward: -346.91519054791377 SOC: 0.5783 Cumulative_SOC_deviation: 23.2251 Fuel Consumption: 114.6643 Total degradation: 459.5326

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 63.588
Episode: 114 Exploration P: 0.0559 Total reward: -264.4241277934196 SOC: 0.5863 Cumulative_SOC_deviation: 14.8700 Fuel Consumption: 115.7246 Total degradation: 462.0340

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 63.343
Episode: 115 Exploration P: 0.0547 Total reward: -283.21530569404996 SOC: 0.5773 Cumulative_SOC_deviation: 17.0016 Fuel Consumption: 113.1994 Total degradation: 474.8992

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 63.291
Episode: 116 Exploration P: 0.0535 Total reward: -398.23166813068633 SOC: 0.5818 Cumulative_SOC_deviation: 28.

maximum steps, simulation is done ... 
elapsed_time: 63.421
Episode: 143 Exploration P: 0.0307 Total reward: -299.5460507134884 SOC: 0.5945 Cumulative_SOC_deviation: 18.2528 Fuel Consumption: 117.0181 Total degradation: 491.9711

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 63.373
Episode: 144 Exploration P: 0.0302 Total reward: -294.48802342132007 SOC: 0.5952 Cumulative_SOC_deviation: 17.7988 Fuel Consumption: 116.4996 Total degradation: 491.9566

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 63.484
Episode: 145 Exploration P: 0.0296 Total reward: -284.3214113877719 SOC: 0.5960 Cumulative_SOC_deviation: 16.7326 Fuel Consumption: 116.9955 Total degradation: 497.7747

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 59.247
Episode: 146 Exploration P: 0.0291 Total reward: -301.3337501413594 SOC: 0.5949 Cumulative_SOC_deviation: 18.51

In [16]:
with open("DDPG_powers_in_kW.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [17]:
# results_dict