In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 
import scipy.io as sio

from vehicle_model_variant_3 import Environment 
from cell_model import CellModel 
from driver_MDP import Driver_MDP 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
# env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)
driver = Driver_MDP(0.02)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64): 
        self.power_mean = 0 
        self.power_std = 0
        self.sum = 0 
        self.sum_deviation = 0 
        self.N = 0 
        
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        self.N += 1 
        index = self.buffer_counter % self.buffer_capacity 
        power = obs_tuple[0][0] 
        
        self.sum += power 
        self.power_mean = self.sum / self.N 
        self.sum_deviation += (power - self.power_mean) ** 2  
        self.power_std = np.sqrt(self.sum_deviation / self.N) 
            
        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)
        
        state_batch = self.state_buffer[batch_indices]
        power_batch = (state_batch[:, 0] - self.power_mean) / self.power_std
        state_batch[:, 0] = power_batch 
        
        next_state_batch = self.next_state_buffer[batch_indices]
        power_batch = (next_state_batch[:, 0] - self.power_mean) / self.power_std
        next_state_batch[:, 0] = power_batch 
#         print(state_batch)
        
        state_batch = tf.convert_to_tensor(state_batch)
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(next_state_batch)
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
#     inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
#     state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 150
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1.0 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 3000 

In [11]:
def initialization(weights_root=None): 
    actor_model = get_actor() 
    critic_model = get_critic() 
    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())

    if weights_root is not None:     
        print("model is loaded on {}".format(weights_root))
        actor_model.load_weights("./{}/actor_model.h5".format(weights_root))
        critic_model.load_weights("./{}/critic_model.h5".format(weights_root))
        target_actor.load_weights("./{}/target_actor.h5".format(weights_root))
        target_critic.load_weights("./{}/target_critic.h5".format(weights_root))
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    if not os.path.exists(root): 
        os.makedirs(root)
        
    actor_model.save_weights("./{}/actor_model.h5".format(root))
    critic_model.save_weights("./{}/critic_model.h5".format(root))
    target_actor.save_weights("./{}/target_actor.h5".format(root))
    target_critic.save_weights("./{}/target_critic.h5".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor, consider_degradation):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor, consider_degradation)
    return env 

In [14]:
def test_agent(actor_model, reward_factor, consider_degradation):
    test_cycle = driver.get_cycle() 
    env = initialization_env(test_cycle, reward_factor, consider_degradation)
    
    total_reward = 0
    state = env.reset() 
    while True: 
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        action = policy_epsilon_greedy(tf_state, -1)
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
        
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    degradation_total = np.sum(np.array(env.history["degradation"])) 
    print("******************* Test is start *****************")
#     print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
         "SOC: {:.4f}".format(env.SOC), 
         "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
         "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
         "Degradation total: {:.4f}".format(degradation_total)
         )
         
    print("******************* Test is done *****************")
    print("")
    plt.subplot(1, 2, 1)
    plt.plot(test_cycle)
    plt.subplot(1, 2, 2)
    plt.plot(env.history["Action"])
    plt.show() 
    return env.history  
    

In [15]:
# print(env.version)

num_trials = 3
results_dict = {} 
driving_cycle_paths = glob.glob("../data/driving_cycles/city/*.mat")[:1]
reward_factor = 10 
consider_degradation = False 

for trial in range(num_trials): 
    print("")
    print("Trial {}".format(trial))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_train_history = [] 
#     episode_test_history = [] 
#     episode_num_test = [] 
    for ep in range(total_episodes):
        i = ep % len(driving_cycle_paths)
        driving_cycle_path =driving_cycle_paths[i]
        print(driving_cycle_path)
        drv_cycle = sio.loadmat(driving_cycle_path)
        driving_cycle = drv_cycle["sch_cycle"][:, 1]

        env = initialization_env(driving_cycle, reward_factor, consider_degradation)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward)
        episode_train_history.append(env.history)

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        degradation_total = np.sum(np.array(env.history["degradation"]))
        print(
            'Episode: {}'.format(ep + 1),
            "Exploration P: {:.4f}".format(eps),
            'Total reward: {}'.format(episodic_reward), 
            "SOC: {:.4f}".format(env.SOC), 
            "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
            "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
            "Total degradation: {:.4f}".format(degradation_total), 
        )
        print("")
        
#         if (ep + 1) % 20 == 0: 
#             history = test_agent(actor_model, reward_factor, consider_degradation)
#             episode_test_history.append(history) 
#             episode_num_test.append(ep + 1)
            
    root = "DDPG_norm"
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "train_history": episode_train_history, 
    }


Trial 0

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 10.615
Episode: 1 Exploration P: 1.0000 Total reward: -4995.677966933952 SOC: 1.0000 Cumulative_SOC_deviation: 480.5298 Fuel Consumption: 190.3804 Total degradation: 521.3697

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 12.175
Episode: 2 Exploration P: 1.0000 Total reward: -5024.9056231614095 SOC: 1.0000 Cumulative_SOC_deviation: 483.5948 Fuel Consumption: 188.9579 Total degradation: 522.6208

../data/driving_cycles/city\01_FTP72_fuds.mat


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('

maximum steps, simulation is done ... 
elapsed_time: 69.096
Episode: 21 Exploration P: 0.6006 Total reward: -2163.7566914179256 SOC: 0.9739 Cumulative_SOC_deviation: 201.0660 Fuel Consumption: 153.0966 Total degradation: 441.3581

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.325
Episode: 22 Exploration P: 0.5846 Total reward: -2792.243108709583 SOC: 0.9716 Cumulative_SOC_deviation: 264.0806 Fuel Consumption: 151.4370 Total degradation: 436.3786

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.099
Episode: 23 Exploration P: 0.5690 Total reward: -2223.08157184056 SOC: 0.9823 Cumulative_SOC_deviation: 206.9387 Fuel Consumption: 153.6945 Total degradation: 432.6049

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.084
Episode: 24 Exploration P: 0.5539 Total reward: -2008.9503683709017 SOC: 0.9217 Cumulative_SOC_deviation: 186.04

  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (


maximum steps, simulation is done ... 
elapsed_time: 69.496
Episode: 42 Exploration P: 0.3418 Total reward: -4881.210418360023 SOC: 0.0865 Cumulative_SOC_deviation: 478.9693 Fuel Consumption: 91.5171 Total degradation: 299.3871

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.692
Episode: 43 Exploration P: 0.3328 Total reward: -1690.9292391253916 SOC: 0.6362 Cumulative_SOC_deviation: 156.1687 Fuel Consumption: 129.2422 Total degradation: 374.5967

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.625
Episode: 44 Exploration P: 0.3240 Total reward: -725.1404234548373 SOC: 0.6572 Cumulative_SOC_deviation: 59.8501 Fuel Consumption: 126.6391 Total degradation: 385.0096

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.479
Episode: 45 Exploration P: 0.3155 Total reward: -834.7837234335184 SOC: 0.6448 Cumulative_SOC_deviation: 70.6768 

maximum steps, simulation is done ... 
elapsed_time: 70.146
Episode: 72 Exploration P: 0.1556 Total reward: -355.08772847107826 SOC: 0.6252 Cumulative_SOC_deviation: 23.3080 Fuel Consumption: 122.0074 Total degradation: 362.4226

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.865
Episode: 73 Exploration P: 0.1516 Total reward: -327.0508682638469 SOC: 0.5951 Cumulative_SOC_deviation: 20.7896 Fuel Consumption: 119.1549 Total degradation: 363.1606

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.749
Episode: 74 Exploration P: 0.1478 Total reward: -330.58583258758324 SOC: 0.6190 Cumulative_SOC_deviation: 20.9928 Fuel Consumption: 120.6578 Total degradation: 349.8465

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.970
Episode: 75 Exploration P: 0.1441 Total reward: -364.05675233457515 SOC: 0.6017 Cumulative_SOC_deviation: 24.3303

maximum steps, simulation is done ... 
elapsed_time: 69.939
Episode: 102 Exploration P: 0.0739 Total reward: -202.65540654984235 SOC: 0.6038 Cumulative_SOC_deviation: 8.6929 Fuel Consumption: 115.7264 Total degradation: 380.0731

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.994
Episode: 103 Exploration P: 0.0721 Total reward: -213.10175146409034 SOC: 0.5904 Cumulative_SOC_deviation: 9.5674 Fuel Consumption: 117.4278 Total degradation: 387.6229

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.937
Episode: 104 Exploration P: 0.0705 Total reward: -259.0877065257911 SOC: 0.5916 Cumulative_SOC_deviation: 14.0145 Fuel Consumption: 118.9425 Total degradation: 385.5404

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.756
Episode: 105 Exploration P: 0.0688 Total reward: -241.04380370506567 SOC: 0.5932 Cumulative_SOC_deviation: 12.30

maximum steps, simulation is done ... 
elapsed_time: 71.654
Episode: 132 Exploration P: 0.0380 Total reward: -199.89798580416326 SOC: 0.5999 Cumulative_SOC_deviation: 8.3634 Fuel Consumption: 116.2637 Total degradation: 437.4700

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.865
Episode: 133 Exploration P: 0.0373 Total reward: -217.6978361547631 SOC: 0.6061 Cumulative_SOC_deviation: 10.2737 Fuel Consumption: 114.9606 Total degradation: 415.4170

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.074
Episode: 134 Exploration P: 0.0365 Total reward: -178.2885061539072 SOC: 0.5987 Cumulative_SOC_deviation: 6.5508 Fuel Consumption: 112.7801 Total degradation: 442.4607

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.948
Episode: 135 Exploration P: 0.0358 Total reward: -186.31760931005564 SOC: 0.6024 Cumulative_SOC_deviation: 7.3709

maximum steps, simulation is done ... 
elapsed_time: 56.433
Episode: 3 Exploration P: 0.9781 Total reward: -4993.869104793851 SOC: 0.9996 Cumulative_SOC_deviation: 480.5478 Fuel Consumption: 188.3910 Total degradation: 533.4770

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 67.985
Episode: 4 Exploration P: 0.9519 Total reward: -5002.829899696057 SOC: 0.9989 Cumulative_SOC_deviation: 482.0253 Fuel Consumption: 182.5773 Total degradation: 530.6970

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 68.672
Episode: 5 Exploration P: 0.9264 Total reward: -4937.640269172037 SOC: 1.0000 Cumulative_SOC_deviation: 475.5434 Fuel Consumption: 182.2062 Total degradation: 526.9866

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 68.922
Episode: 6 Exploration P: 0.9016 Total reward: -4880.538740343076 SOC: 0.9997 Cumulative_SOC_deviation: 470.4888 Fu

maximum steps, simulation is done ... 
elapsed_time: 69.309
Episode: 33 Exploration P: 0.4348 Total reward: -1534.8205073110532 SOC: 0.5103 Cumulative_SOC_deviation: 141.5111 Fuel Consumption: 119.7093 Total degradation: 366.9058

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.699
Episode: 34 Exploration P: 0.4233 Total reward: -1417.4872303583588 SOC: 0.4977 Cumulative_SOC_deviation: 129.9994 Fuel Consumption: 117.4931 Total degradation: 361.0488

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.872
Episode: 35 Exploration P: 0.4121 Total reward: -1938.622025687497 SOC: 0.4886 Cumulative_SOC_deviation: 182.1016 Fuel Consumption: 117.6064 Total degradation: 356.0533

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.940
Episode: 36 Exploration P: 0.4012 Total reward: -3202.148789904089 SOC: 0.2864 Cumulative_SOC_deviation: 309.9

maximum steps, simulation is done ... 
elapsed_time: 70.261
Episode: 63 Exploration P: 0.1970 Total reward: -921.5929698290115 SOC: 0.6861 Cumulative_SOC_deviation: 78.6566 Fuel Consumption: 135.0272 Total degradation: 268.0019

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.206
Episode: 64 Exploration P: 0.1920 Total reward: -1489.158446014896 SOC: 0.5953 Cumulative_SOC_deviation: 136.2807 Fuel Consumption: 126.3516 Total degradation: 251.2384

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.062
Episode: 65 Exploration P: 0.1870 Total reward: -451.89163287424776 SOC: 0.5819 Cumulative_SOC_deviation: 32.6827 Fuel Consumption: 125.0644 Total degradation: 331.1669

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.433
Episode: 66 Exploration P: 0.1822 Total reward: -563.6485152398118 SOC: 0.5984 Cumulative_SOC_deviation: 43.9530 

maximum steps, simulation is done ... 
elapsed_time: 70.859
Episode: 93 Exploration P: 0.0921 Total reward: -336.9317549554393 SOC: 0.6155 Cumulative_SOC_deviation: 20.8070 Fuel Consumption: 128.8613 Total degradation: 222.0648

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.965
Episode: 94 Exploration P: 0.0898 Total reward: -435.7285609100981 SOC: 0.6072 Cumulative_SOC_deviation: 30.6998 Fuel Consumption: 128.7302 Total degradation: 226.2302

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.194
Episode: 95 Exploration P: 0.0877 Total reward: -517.8463366975485 SOC: 0.6043 Cumulative_SOC_deviation: 38.9012 Fuel Consumption: 128.8348 Total degradation: 188.3142

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.268
Episode: 96 Exploration P: 0.0856 Total reward: -345.6147755789331 SOC: 0.6067 Cumulative_SOC_deviation: 21.7029 Fu

maximum steps, simulation is done ... 
elapsed_time: 70.736
Episode: 123 Exploration P: 0.0460 Total reward: -479.0464736235104 SOC: 0.5940 Cumulative_SOC_deviation: 35.3109 Fuel Consumption: 125.9375 Total degradation: 367.6647

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.768
Episode: 124 Exploration P: 0.0450 Total reward: -295.07443074808697 SOC: 0.6176 Cumulative_SOC_deviation: 16.8001 Fuel Consumption: 127.0739 Total degradation: 326.4492

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.331
Episode: 125 Exploration P: 0.0441 Total reward: -411.71285159285566 SOC: 0.6298 Cumulative_SOC_deviation: 27.9575 Fuel Consumption: 132.1374 Total degradation: 186.5167

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.599
Episode: 126 Exploration P: 0.0432 Total reward: -828.6158056171172 SOC: 0.6157 Cumulative_SOC_deviation: 69.8



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float

maximum steps, simulation is done ... 
elapsed_time: 70.019
Episode: 23 Exploration P: 0.5690 Total reward: -3056.6202449386046 SOC: 0.9894 Cumulative_SOC_deviation: 290.3565 Fuel Consumption: 153.0554 Total degradation: 450.5447

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.044
Episode: 24 Exploration P: 0.5539 Total reward: -2710.1532883575674 SOC: 0.9559 Cumulative_SOC_deviation: 255.8376 Fuel Consumption: 151.7772 Total degradation: 438.0234

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.236
Episode: 25 Exploration P: 0.5391 Total reward: -1416.970137547761 SOC: 0.8794 Cumulative_SOC_deviation: 126.9873 Fuel Consumption: 147.0974 Total degradation: 416.8346

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.580
Episode: 26 Exploration P: 0.5248 Total reward: -1525.6085146561063 SOC: 0.8819 Cumulative_SOC_deviation: 137.

maximum steps, simulation is done ... 
elapsed_time: 71.017
Episode: 53 Exploration P: 0.2557 Total reward: -654.6386635963067 SOC: 0.6045 Cumulative_SOC_deviation: 53.0090 Fuel Consumption: 124.5489 Total degradation: 330.5324

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.373
Episode: 54 Exploration P: 0.2490 Total reward: -370.57898590179656 SOC: 0.6132 Cumulative_SOC_deviation: 24.7262 Fuel Consumption: 123.3175 Total degradation: 362.5309

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.616
Episode: 55 Exploration P: 0.2425 Total reward: -360.0441877114428 SOC: 0.6333 Cumulative_SOC_deviation: 23.5056 Fuel Consumption: 124.9878 Total degradation: 349.5720

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.619
Episode: 56 Exploration P: 0.2363 Total reward: -438.0051409775512 SOC: 0.6063 Cumulative_SOC_deviation: 31.3181 F

maximum steps, simulation is done ... 
elapsed_time: 70.598
Episode: 83 Exploration P: 0.1178 Total reward: -332.2273942222375 SOC: 0.6011 Cumulative_SOC_deviation: 21.1849 Fuel Consumption: 120.3787 Total degradation: 398.8430

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.721
Episode: 84 Exploration P: 0.1149 Total reward: -220.6028889072184 SOC: 0.5907 Cumulative_SOC_deviation: 10.6089 Fuel Consumption: 114.5139 Total degradation: 379.7323

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.305
Episode: 85 Exploration P: 0.1120 Total reward: -255.02581951882615 SOC: 0.5924 Cumulative_SOC_deviation: 13.7129 Fuel Consumption: 117.8965 Total degradation: 365.9740

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.468
Episode: 86 Exploration P: 0.1093 Total reward: -195.81010419562406 SOC: 0.5978 Cumulative_SOC_deviation: 8.1264 F

maximum steps, simulation is done ... 
elapsed_time: 63.083
Episode: 113 Exploration P: 0.0573 Total reward: -205.36340037252023 SOC: 0.5908 Cumulative_SOC_deviation: 8.5076 Fuel Consumption: 120.2870 Total degradation: 381.7904

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 63.089
Episode: 114 Exploration P: 0.0560 Total reward: -248.66443911283187 SOC: 0.5899 Cumulative_SOC_deviation: 12.8579 Fuel Consumption: 120.0858 Total degradation: 385.1215

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 63.332
Episode: 115 Exploration P: 0.0548 Total reward: -259.92635341196143 SOC: 0.5871 Cumulative_SOC_deviation: 13.9035 Fuel Consumption: 120.8913 Total degradation: 366.3805

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 63.203
Episode: 116 Exploration P: 0.0536 Total reward: -208.2903597039736 SOC: 0.5909 Cumulative_SOC_deviation: 8.67

maximum steps, simulation is done ... 
elapsed_time: 63.277
Episode: 143 Exploration P: 0.0308 Total reward: -336.00806741128815 SOC: 0.6175 Cumulative_SOC_deviation: 21.5704 Fuel Consumption: 120.3043 Total degradation: 271.8942

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 63.144
Episode: 144 Exploration P: 0.0302 Total reward: -353.03698969416024 SOC: 0.5929 Cumulative_SOC_deviation: 23.5518 Fuel Consumption: 117.5195 Total degradation: 314.3784

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 63.021
Episode: 145 Exploration P: 0.0296 Total reward: -169.06771007688698 SOC: 0.6107 Cumulative_SOC_deviation: 5.5394 Fuel Consumption: 113.6736 Total degradation: 340.5490

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 62.977
Episode: 146 Exploration P: 0.0291 Total reward: -297.22448193678474 SOC: 0.5895 Cumulative_SOC_deviation: 18.

In [16]:
with open("DDPG_norm.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [17]:
# results_dict