In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 
import scipy.io as sio

from vehicle_model_variant_4 import Environment 
from cell_model import CellModel 
from driver_MDP import Driver_MDP 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
# env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)
driver = Driver_MDP(0.02)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 150
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1.0 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 3000 

In [11]:
def initialization(weights_root=None): 
    actor_model = get_actor() 
    critic_model = get_critic() 
    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())

    if weights_root is not None:     
        print("model is loaded on {}".format(weights_root))
        actor_model.load_weights("./{}/actor_model.h5".format(weights_root))
        critic_model.load_weights("./{}/critic_model.h5".format(weights_root))
        target_actor.load_weights("./{}/target_actor.h5".format(weights_root))
        target_critic.load_weights("./{}/target_critic.h5".format(weights_root))
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    if not os.path.exists(root): 
        os.makedirs(root)
        
    actor_model.save_weights("./{}/actor_model.h5".format(root))
    critic_model.save_weights("./{}/critic_model.h5".format(root))
    target_actor.save_weights("./{}/target_actor.h5".format(root))
    target_critic.save_weights("./{}/target_critic.h5".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor, consider_degradation):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor, consider_degradation)
    return env 

In [14]:
def test_agent(actor_model, reward_factor, consider_degradation):
    test_cycle = driver.get_cycle() 
    env = initialization_env(test_cycle, reward_factor, consider_degradation)
    
    total_reward = 0
    state = env.reset() 
    while True: 
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        action = policy_epsilon_greedy(tf_state, -1)
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
        
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    degradation_total = np.sum(np.array(env.history["degradation"])) 
    print("******************* Test is start *****************")
#     print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
         "SOC: {:.4f}".format(env.SOC), 
         "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
         "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
         "Degradation total: {:.4f}".format(degradation_total)
         )
         
    print("******************* Test is done *****************")
    print("")
    plt.subplot(1, 2, 1)
    plt.plot(test_cycle)
    plt.subplot(1, 2, 2)
    plt.plot(env.history["Action"])
    plt.show() 
    return env.history  
    

In [15]:
# print(env.version)

num_trials = 3
results_dict = {} 
driving_cycle_paths = glob.glob("../data/driving_cycles/city/*.mat")[:1]
reward_factor = 10 
consider_degradation = False 

for trial in range(num_trials): 
    print("")
    print("Trial {}".format(trial))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_train_history = [] 
#     episode_test_history = [] 
#     episode_num_test = [] 
    for ep in range(total_episodes):
        i = ep % len(driving_cycle_paths)
        driving_cycle_path =driving_cycle_paths[i]
        print(driving_cycle_path)
        drv_cycle = sio.loadmat(driving_cycle_path)
        driving_cycle = drv_cycle["sch_cycle"][:, 1]

        env = initialization_env(driving_cycle, reward_factor, consider_degradation)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward)
        episode_train_history.append(env.history)

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        degradation_total = np.sum(np.array(env.history["degradation"]))
        print(
            'Episode: {}'.format(ep + 1),
            "Exploration P: {:.4f}".format(eps),
            'Total reward: {}'.format(episodic_reward), 
            "SOC: {:.4f}".format(env.SOC), 
            "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
            "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
            "Total degradation: {:.4f}".format(degradation_total), 
        )
        print("")
        
#         if (ep + 1) % 20 == 0: 
#             history = test_agent(actor_model, reward_factor, consider_degradation)
#             episode_test_history.append(history) 
#             episode_num_test.append(ep + 1)
            
    root = "DDPG_batchnorm"
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "train_history": episode_train_history, 
    }


Trial 0

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 11.135
Episode: 1 Exploration P: 1.0000 Total reward: -5015.261242603985 SOC: 1.0000 Cumulative_SOC_deviation: 482.4438 Fuel Consumption: 190.8236 Total degradation: 523.0843

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 17.558
Episode: 2 Exploration P: 1.0000 Total reward: -5021.933693427642 SOC: 1.0000 Cumulative_SOC_deviation: 482.9492 Fuel Consumption: 192.4420 Total degradation: 527.6469

../data/driving_cycles/city\01_FTP72_fuds.mat


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('f

maximum steps, simulation is done ... 
elapsed_time: 86.940
Episode: 20 Exploration P: 0.6170 Total reward: -3269.5976570940416 SOC: 0.9936 Cumulative_SOC_deviation: 311.6388 Fuel Consumption: 153.2100 Total degradation: 446.3288

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 86.993
Episode: 21 Exploration P: 0.6006 Total reward: -2964.2902475338833 SOC: 0.9891 Cumulative_SOC_deviation: 281.0926 Fuel Consumption: 153.3646 Total degradation: 453.0420

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.467
Episode: 22 Exploration P: 0.5846 Total reward: -1976.5181355956154 SOC: 0.9521 Cumulative_SOC_deviation: 182.4648 Fuel Consumption: 151.8700 Total degradation: 440.5736

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 87.623
Episode: 23 Exploration P: 0.5690 Total reward: -2153.8664162797645 SOC: 0.9360 Cumulative_SOC_deviation: 200

maximum steps, simulation is done ... 
elapsed_time: 88.644
Episode: 50 Exploration P: 0.2763 Total reward: -209.50430065226578 SOC: 0.5918 Cumulative_SOC_deviation: 9.2620 Fuel Consumption: 116.8839 Total degradation: 480.0178

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 89.104
Episode: 51 Exploration P: 0.2691 Total reward: -213.0255616068982 SOC: 0.6068 Cumulative_SOC_deviation: 9.5685 Fuel Consumption: 117.3408 Total degradation: 472.1192

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 89.011
Episode: 52 Exploration P: 0.2621 Total reward: -231.62941020312402 SOC: 0.6032 Cumulative_SOC_deviation: 11.4144 Fuel Consumption: 117.4854 Total degradation: 473.8121

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 89.479
Episode: 53 Exploration P: 0.2553 Total reward: -238.81942145545818 SOC: 0.5920 Cumulative_SOC_deviation: 12.2393 F

maximum steps, simulation is done ... 
elapsed_time: 88.100
Episode: 80 Exploration P: 0.1269 Total reward: -336.38327914263004 SOC: 0.5891 Cumulative_SOC_deviation: 22.3144 Fuel Consumption: 113.2388 Total degradation: 485.3158

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.197
Episode: 81 Exploration P: 0.1237 Total reward: -310.8640166109737 SOC: 0.5988 Cumulative_SOC_deviation: 19.7299 Fuel Consumption: 113.5649 Total degradation: 493.6762

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.095
Episode: 82 Exploration P: 0.1206 Total reward: -248.91625354199056 SOC: 0.5840 Cumulative_SOC_deviation: 13.7114 Fuel Consumption: 111.8022 Total degradation: 524.4058

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.492
Episode: 83 Exploration P: 0.1176 Total reward: -242.86128572546573 SOC: 0.5927 Cumulative_SOC_deviation: 13.1315

maximum steps, simulation is done ... 
elapsed_time: 89.106
Episode: 110 Exploration P: 0.0613 Total reward: -288.8510248003032 SOC: 0.5871 Cumulative_SOC_deviation: 17.9449 Fuel Consumption: 109.4017 Total degradation: 558.4089

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 89.553
Episode: 111 Exploration P: 0.0599 Total reward: -322.24295352595084 SOC: 0.5931 Cumulative_SOC_deviation: 21.2593 Fuel Consumption: 109.6502 Total degradation: 558.8471

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 90.268
Episode: 112 Exploration P: 0.0585 Total reward: -267.8616506018937 SOC: 0.5888 Cumulative_SOC_deviation: 15.9323 Fuel Consumption: 108.5383 Total degradation: 552.6249

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 90.119
Episode: 113 Exploration P: 0.0572 Total reward: -331.5029167031002 SOC: 0.5918 Cumulative_SOC_deviation: 22.26

maximum steps, simulation is done ... 
elapsed_time: 88.928
Episode: 140 Exploration P: 0.0325 Total reward: -261.414807752226 SOC: 0.5951 Cumulative_SOC_deviation: 15.3442 Fuel Consumption: 107.9730 Total degradation: 557.9932

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.846
Episode: 141 Exploration P: 0.0319 Total reward: -280.55621479611983 SOC: 0.5908 Cumulative_SOC_deviation: 17.2891 Fuel Consumption: 107.6648 Total degradation: 559.6524

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.590
Episode: 142 Exploration P: 0.0313 Total reward: -244.06305172862335 SOC: 0.5929 Cumulative_SOC_deviation: 13.6114 Fuel Consumption: 107.9494 Total degradation: 555.9176

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.406
Episode: 143 Exploration P: 0.0307 Total reward: -274.2222263713489 SOC: 0.5894 Cumulative_SOC_deviation: 16.71

maximum steps, simulation is done ... 
elapsed_time: 86.921
Episode: 10 Exploration P: 0.8088 Total reward: -4608.187658267325 SOC: 0.9994 Cumulative_SOC_deviation: 444.0011 Fuel Consumption: 168.1771 Total degradation: 511.0208

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 87.306
Episode: 11 Exploration P: 0.7872 Total reward: -4537.801916220189 SOC: 0.9986 Cumulative_SOC_deviation: 437.2686 Fuel Consumption: 165.1157 Total degradation: 506.5040

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 87.724
Episode: 12 Exploration P: 0.7661 Total reward: -4326.489536739359 SOC: 0.9997 Cumulative_SOC_deviation: 416.4951 Fuel Consumption: 161.5388 Total degradation: 493.9991

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.052
Episode: 13 Exploration P: 0.7456 Total reward: -4055.3474630158607 SOC: 0.9980 Cumulative_SOC_deviation: 389.65

maximum steps, simulation is done ... 
elapsed_time: 89.732
Episode: 40 Exploration P: 0.3605 Total reward: -302.3474909865296 SOC: 0.6142 Cumulative_SOC_deviation: 17.9312 Fuel Consumption: 123.0356 Total degradation: 415.8120

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.546
Episode: 41 Exploration P: 0.3510 Total reward: -215.89774971869045 SOC: 0.6168 Cumulative_SOC_deviation: 9.5092 Fuel Consumption: 120.8054 Total degradation: 442.2808

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.518
Episode: 42 Exploration P: 0.3418 Total reward: -188.77694247590384 SOC: 0.5987 Cumulative_SOC_deviation: 6.8353 Fuel Consumption: 120.4240 Total degradation: 447.7670

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.910
Episode: 43 Exploration P: 0.3328 Total reward: -209.90025931207862 SOC: 0.6103 Cumulative_SOC_deviation: 8.8783 Fu

maximum steps, simulation is done ... 
elapsed_time: 88.874
Episode: 70 Exploration P: 0.1638 Total reward: -266.81759624983096 SOC: 0.5942 Cumulative_SOC_deviation: 15.3932 Fuel Consumption: 112.8853 Total degradation: 548.1100

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 89.128
Episode: 71 Exploration P: 0.1596 Total reward: -285.28639264670585 SOC: 0.5907 Cumulative_SOC_deviation: 17.3467 Fuel Consumption: 111.8194 Total degradation: 563.4506

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 89.091
Episode: 72 Exploration P: 0.1556 Total reward: -299.5012419216405 SOC: 0.5947 Cumulative_SOC_deviation: 18.7141 Fuel Consumption: 112.3600 Total degradation: 566.7645

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 89.355
Episode: 73 Exploration P: 0.1516 Total reward: -311.10350913616463 SOC: 0.5870 Cumulative_SOC_deviation: 20.0069

maximum steps, simulation is done ... 
elapsed_time: 90.688
Episode: 100 Exploration P: 0.0775 Total reward: -191.19625696424518 SOC: 0.5958 Cumulative_SOC_deviation: 8.1044 Fuel Consumption: 110.1526 Total degradation: 506.5127

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 90.663
Episode: 101 Exploration P: 0.0756 Total reward: -201.43014376574644 SOC: 0.5945 Cumulative_SOC_deviation: 9.1751 Fuel Consumption: 109.6790 Total degradation: 540.1585

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 90.645
Episode: 102 Exploration P: 0.0739 Total reward: -204.12178802070392 SOC: 0.5969 Cumulative_SOC_deviation: 9.5427 Fuel Consumption: 108.6944 Total degradation: 537.6620

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 89.466
Episode: 103 Exploration P: 0.0721 Total reward: -208.03057054254674 SOC: 0.5950 Cumulative_SOC_deviation: 9.891

maximum steps, simulation is done ... 
elapsed_time: 89.432
Episode: 130 Exploration P: 0.0396 Total reward: -213.57830551852987 SOC: 0.5952 Cumulative_SOC_deviation: 10.5296 Fuel Consumption: 108.2821 Total degradation: 523.0987

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 89.281
Episode: 131 Exploration P: 0.0388 Total reward: -226.17368422158225 SOC: 0.5934 Cumulative_SOC_deviation: 11.8159 Fuel Consumption: 108.0144 Total degradation: 548.4588

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 89.527
Episode: 132 Exploration P: 0.0380 Total reward: -271.0094539011281 SOC: 0.5954 Cumulative_SOC_deviation: 16.2666 Fuel Consumption: 108.3438 Total degradation: 565.8962

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 89.625
Episode: 133 Exploration P: 0.0373 Total reward: -283.0011157032841 SOC: 0.5919 Cumulative_SOC_deviation: 17.4



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

maximum steps, simulation is done ... 
elapsed_time: 73.001
Episode: 3 Exploration P: 0.9781 Total reward: -5090.904004040222 SOC: 1.0000 Cumulative_SOC_deviation: 490.2523 Fuel Consumption: 188.3806 Total degradation: 530.5403

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 86.661
Episode: 4 Exploration P: 0.9519 Total reward: -4925.5023112507715 SOC: 1.0000 Cumulative_SOC_deviation: 474.1173 Fuel Consumption: 184.3296 Total degradation: 524.4175

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 86.700
Episode: 5 Exploration P: 0.9264 Total reward: -4978.37155673891 SOC: 0.9999 Cumulative_

maximum steps, simulation is done ... 
elapsed_time: 78.803
Episode: 31 Exploration P: 0.4587 Total reward: -1036.571205952761 SOC: 0.5968 Cumulative_SOC_deviation: 91.0935 Fuel Consumption: 125.6363 Total degradation: 372.7235

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 78.784
Episode: 32 Exploration P: 0.4466 Total reward: -1765.4368836619833 SOC: 0.5529 Cumulative_SOC_deviation: 164.3048 Fuel Consumption: 122.3893 Total degradation: 382.3005

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 78.618
Episode: 33 Exploration P: 0.4348 Total reward: -1877.328462603113 SOC: 0.4038 Cumulative_SOC_deviation: 176.6711 Fuel Consumption: 110.6177 Total degradation: 357.3504

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 78.569
Episode: 34 Exploration P: 0.4233 Total reward: -2777.10040751764 SOC: 0.3949 Cumulative_SOC_deviation: 266.6184

  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (


maximum steps, simulation is done ... 
elapsed_time: 78.880
Episode: 40 Exploration P: 0.3605 Total reward: -4502.370204211517 SOC: 0.0847 Cumulative_SOC_deviation: 441.2089 Fuel Consumption: 90.2813 Total degradation: 307.3146

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 78.692
Episode: 41 Exploration P: 0.3510 Total reward: -347.08678448110726 SOC: 0.6141 Cumulative_SOC_deviation: 22.1915 Fuel Consumption: 125.1713 Total degradation: 431.4747

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 78.726
Episode: 42 Exploration P: 0.3418 Total reward: -262.52027882429053 SOC: 0.6300 Cumulative_SOC_deviation: 13.8512 Fuel Consumption: 124.0087 Total degradation: 432.7574

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 78.933
Episode: 43 Exploration P: 0.3328 Total reward: -251.6226941231706 SOC: 0.6022 Cumulative_SOC_deviation: 13.1039 

maximum steps, simulation is done ... 
elapsed_time: 61.010
Episode: 70 Exploration P: 0.1638 Total reward: -303.6747483752749 SOC: 0.5947 Cumulative_SOC_deviation: 19.1768 Fuel Consumption: 111.9071 Total degradation: 559.2804

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 61.111
Episode: 71 Exploration P: 0.1596 Total reward: -233.88535755574046 SOC: 0.5867 Cumulative_SOC_deviation: 12.2504 Fuel Consumption: 111.3813 Total degradation: 552.6365

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 60.924
Episode: 72 Exploration P: 0.1556 Total reward: -275.8361780764633 SOC: 0.5838 Cumulative_SOC_deviation: 16.4995 Fuel Consumption: 110.8416 Total degradation: 567.1782

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 60.947
Episode: 73 Exploration P: 0.1516 Total reward: -267.8136076847627 SOC: 0.5914 Cumulative_SOC_deviation: 15.6680 F

maximum steps, simulation is done ... 
elapsed_time: 60.964
Episode: 100 Exploration P: 0.0775 Total reward: -207.85963694978554 SOC: 0.5969 Cumulative_SOC_deviation: 9.8551 Fuel Consumption: 109.3091 Total degradation: 521.4973

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 61.026
Episode: 101 Exploration P: 0.0756 Total reward: -191.57991714630927 SOC: 0.5936 Cumulative_SOC_deviation: 8.2944 Fuel Consumption: 108.6361 Total degradation: 531.8607

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 61.110
Episode: 102 Exploration P: 0.0739 Total reward: -179.72562379622673 SOC: 0.5961 Cumulative_SOC_deviation: 7.0741 Fuel Consumption: 108.9844 Total degradation: 544.6899

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 61.091
Episode: 103 Exploration P: 0.0721 Total reward: -179.6366445481951 SOC: 0.5965 Cumulative_SOC_deviation: 7.0039

maximum steps, simulation is done ... 
elapsed_time: 61.405
Episode: 130 Exploration P: 0.0396 Total reward: -302.3382377219462 SOC: 0.5844 Cumulative_SOC_deviation: 19.4519 Fuel Consumption: 107.8195 Total degradation: 465.8197

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 61.492
Episode: 131 Exploration P: 0.0388 Total reward: -301.6272201853014 SOC: 0.5863 Cumulative_SOC_deviation: 19.4090 Fuel Consumption: 107.5373 Total degradation: 466.2286

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 61.643
Episode: 132 Exploration P: 0.0380 Total reward: -296.1862437374914 SOC: 0.5924 Cumulative_SOC_deviation: 18.7683 Fuel Consumption: 108.5030 Total degradation: 482.0762

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 61.382
Episode: 133 Exploration P: 0.0373 Total reward: -262.7703936037639 SOC: 0.5935 Cumulative_SOC_deviation: 15.392

In [16]:
with open("DDPG_batchnorm.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [17]:
# results_dict