In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 
import scipy.io as sio

from vehicle_model_variant_1 import Environment 
from cell_model import CellModel 
from driver_MDP import Driver_MDP 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
# env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)
driver = Driver_MDP(0.02)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
#     inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
#     state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 150
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1.0 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 3000 

In [11]:
def initialization(weights_root=None): 
    actor_model = get_actor() 
    critic_model = get_critic() 
    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())

    if weights_root is not None:     
        print("model is loaded on {}".format(weights_root))
        actor_model.load_weights("./{}/actor_model.h5".format(weights_root))
        critic_model.load_weights("./{}/critic_model.h5".format(weights_root))
        target_actor.load_weights("./{}/target_actor.h5".format(weights_root))
        target_critic.load_weights("./{}/target_critic.h5".format(weights_root))
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    if not os.path.exists(root): 
        os.makedirs(root)
        
    actor_model.save_weights("./{}/actor_model.h5".format(root))
    critic_model.save_weights("./{}/critic_model.h5".format(root))
    target_actor.save_weights("./{}/target_actor.h5".format(root))
    target_critic.save_weights("./{}/target_critic.h5".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor, consider_degradation):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor, consider_degradation)
    return env 

In [14]:
def test_agent(actor_model, reward_factor, consider_degradation):
    test_cycle = driver.get_cycle() 
    env = initialization_env(test_cycle, reward_factor, consider_degradation)
    
    total_reward = 0
    state = env.reset() 
    while True: 
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        action = policy_epsilon_greedy(tf_state, -1)
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
        
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    degradation_total = np.sum(np.array(env.history["degradation"])) 
    print("******************* Test is start *****************")
#     print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
         "SOC: {:.4f}".format(env.SOC), 
         "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
         "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
         "Degradation total: {:.4f}".format(degradation_total)
         )
         
    print("******************* Test is done *****************")
    print("")
    plt.subplot(1, 2, 1)
    plt.plot(test_cycle)
    plt.subplot(1, 2, 2)
    plt.plot(env.history["Action"])
    plt.show() 
    return env.history  
    

In [15]:
# print(env.version)

num_trials = 3
results_dict = {} 
driving_cycle_paths = glob.glob("../data/driving_cycles/city/*.mat")[:1]
reward_factor = 10 
consider_degradation = False 

for trial in range(num_trials): 
    print("")
    print("Trial {}".format(trial))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_train_history = [] 
#     episode_test_history = [] 
#     episode_num_test = [] 
    for ep in range(total_episodes):
        i = ep % len(driving_cycle_paths)
        driving_cycle_path =driving_cycle_paths[i]
        print(driving_cycle_path)
        drv_cycle = sio.loadmat(driving_cycle_path)
        driving_cycle = drv_cycle["sch_cycle"][:, 1]

        env = initialization_env(driving_cycle, reward_factor, consider_degradation)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward)
        episode_train_history.append(env.history)

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        degradation_total = np.sum(np.array(env.history["degradation"]))
        print(
            'Episode: {}'.format(ep + 1),
            "Exploration P: {:.4f}".format(eps),
            'Total reward: {}'.format(episodic_reward), 
            "SOC: {:.4f}".format(env.SOC), 
            "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
            "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
            "Total degradation: {:.4f}".format(degradation_total), 
        )
        print("")
        
#         if (ep + 1) % 20 == 0: 
#             history = test_agent(actor_model, reward_factor, consider_degradation)
#             episode_test_history.append(history) 
#             episode_num_test.append(ep + 1)
            
    root = "DDPG_power_in_watt"
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "train_history": episode_train_history, 
    }


Trial 0

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 10.468
Episode: 1 Exploration P: 1.0000 Total reward: -5067.916888587105 SOC: 1.0000 Cumulative_SOC_deviation: 487.7763 Fuel Consumption: 190.1536 Total degradation: 525.1671

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 11.569
Episode: 2 Exploration P: 1.0000 Total reward: -5059.98666599966 SOC: 1.0000 Cumulative_SOC_deviation: 486.9307 Fuel Consumption: 190.6793 Total degradation: 524.6837

../data/driving_cycles/city\01_FTP72_fuds.mat


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('fl

maximum steps, simulation is done ... 
elapsed_time: 71.032
Episode: 21 Exploration P: 0.6006 Total reward: -2062.779756647829 SOC: 0.9379 Cumulative_SOC_deviation: 191.2683 Fuel Consumption: 150.0970 Total degradation: 433.0162

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.234
Episode: 22 Exploration P: 0.5846 Total reward: -2196.499897971665 SOC: 0.9994 Cumulative_SOC_deviation: 204.1208 Fuel Consumption: 155.2922 Total degradation: 442.2156

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.162
Episode: 23 Exploration P: 0.5690 Total reward: -2357.197174872022 SOC: 0.9702 Cumulative_SOC_deviation: 220.4503 Fuel Consumption: 152.6946 Total degradation: 434.2669

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.118
Episode: 24 Exploration P: 0.5539 Total reward: -1541.329236485415 SOC: 0.8823 Cumulative_SOC_deviation: 139.428

  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (


maximum steps, simulation is done ... 
elapsed_time: 71.388
Episode: 42 Exploration P: 0.3418 Total reward: -5011.606459728395 SOC: 0.0719 Cumulative_SOC_deviation: 492.1337 Fuel Consumption: 90.2698 Total degradation: 298.5485

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.529
Episode: 43 Exploration P: 0.3328 Total reward: -4727.564037651842 SOC: 0.0803 Cumulative_SOC_deviation: 463.7542 Fuel Consumption: 90.0224 Total degradation: 297.2946

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.369
Episode: 44 Exploration P: 0.3240 Total reward: -4792.016635270668 SOC: 0.0891 Cumulative_SOC_deviation: 470.0304 Fuel Consumption: 91.7129 Total degradation: 297.2881

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.891
Episode: 45 Exploration P: 0.3155 Total reward: -5140.543413871872 SOC: 0.0252 Cumulative_SOC_deviation: 505.2861 F

  2 * r_dis)) * (v_dis - (v_dis ** 2 - 4 * r_dis * p_bat) ** (0.5)) * (p_bat >= 0)


battery power is 7243.959106094168(+) but condition is not avail
elapsed_time: 46.044
Episode: 52 Exploration P: 0.2717 Total reward: [-4044.89626625] SOC: -0.0004 Cumulative_SOC_deviation: 300.3366 Fuel Consumption: 47.5351 Total degradation: 156.8471

../data/driving_cycles/city\01_FTP72_fuds.mat
battery power is 4457.059888197523(+) but condition is not avail
elapsed_time: 56.251
Episode: 53 Exploration P: 0.2661 Total reward: [-4456.97976818] SOC: -0.0006 Cumulative_SOC_deviation: 339.9952 Fuel Consumption: 63.0346 Total degradation: 195.6711

../data/driving_cycles/city\01_FTP72_fuds.mat
battery power is 5809.342646515329(+) but condition is not avail
elapsed_time: 47.074
Episode: 54 Exploration P: 0.2616 Total reward: [-4002.09387868] SOC: -0.0004 Cumulative_SOC_deviation: 295.9392 Fuel Consumption: 48.7063 Total degradation: 169.8154

../data/driving_cycles/city\01_FTP72_fuds.mat
battery power is 12069.58413199924(+) but condition is not avail
elapsed_time: 55.651
Episode: 55 Ex

battery power is 11680.531306837445(+) but condition is not avail
elapsed_time: 31.655
Episode: 80 Exploration P: 0.1757 Total reward: [-2908.29379754] SOC: -0.0020 Cumulative_SOC_deviation: 189.2159 Fuel Consumption: 22.1559 Total degradation: 71.0589

../data/driving_cycles/city\01_FTP72_fuds.mat
battery power is 5529.075879811097(+) but condition is not avail
elapsed_time: 36.897
Episode: 81 Exploration P: 0.1734 Total reward: [-3385.97273196] SOC: -0.0014 Cumulative_SOC_deviation: 236.1248 Fuel Consumption: 30.7399 Total degradation: 98.0072

../data/driving_cycles/city\01_FTP72_fuds.mat
battery power is 6053.485987488464(+) but condition is not avail
elapsed_time: 30.754
Episode: 82 Exploration P: 0.1715 Total reward: [-2903.34407619] SOC: -0.0005 Cumulative_SOC_deviation: 188.8212 Fuel Consumption: 21.1374 Total degradation: 68.9282

../data/driving_cycles/city\01_FTP72_fuds.mat
battery power is 13874.9270521893(+) but condition is not avail
elapsed_time: 40.585
Episode: 83 Explo

maximum steps, simulation is done ... 
elapsed_time: 72.218
Episode: 108 Exploration P: 0.1182 Total reward: -5465.322130738123 SOC: 1.0000 Cumulative_SOC_deviation: 513.9888 Fuel Consumption: 325.4346 Total degradation: 260.3551

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.994
Episode: 109 Exploration P: 0.1152 Total reward: -5494.432989455268 SOC: 1.0000 Cumulative_SOC_deviation: 516.7936 Fuel Consumption: 326.4973 Total degradation: 287.5456

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 72.279
Episode: 110 Exploration P: 0.1124 Total reward: -5487.048453348325 SOC: 1.0000 Cumulative_SOC_deviation: 515.9700 Fuel Consumption: 327.3480 Total degradation: 285.4411

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 72.129
Episode: 111 Exploration P: 0.1096 Total reward: -5479.9202153414735 SOC: 1.0000 Cumulative_SOC_deviation: 51

maximum steps, simulation is done ... 
elapsed_time: 73.018
Episode: 138 Exploration P: 0.0575 Total reward: -5526.300663555755 SOC: 0.9968 Cumulative_SOC_deviation: 522.7274 Fuel Consumption: 299.0265 Total degradation: 174.6638

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 72.027
Episode: 139 Exploration P: 0.0562 Total reward: -5533.43503881876 SOC: 0.9963 Cumulative_SOC_deviation: 523.3843 Fuel Consumption: 299.5918 Total degradation: 170.4788

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 72.475
Episode: 140 Exploration P: 0.0549 Total reward: -5394.261171686734 SOC: 0.9634 Cumulative_SOC_deviation: 512.6691 Fuel Consumption: 267.5705 Total degradation: 165.7408

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.977
Episode: 141 Exploration P: 0.0537 Total reward: -2686.7721059032074 SOC: 0.9969 Cumulative_SOC_deviation: 251

maximum steps, simulation is done ... 
elapsed_time: 70.271
Episode: 8 Exploration P: 0.8539 Total reward: -5156.5486756855225 SOC: 1.0000 Cumulative_SOC_deviation: 494.3120 Fuel Consumption: 213.4289 Total degradation: 525.4677

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.324
Episode: 9 Exploration P: 0.8311 Total reward: -5208.078290312036 SOC: 1.0000 Cumulative_SOC_deviation: 499.4866 Fuel Consumption: 213.2124 Total degradation: 507.8811

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.258
Episode: 10 Exploration P: 0.8088 Total reward: -5211.651852356589 SOC: 1.0000 Cumulative_SOC_deviation: 499.5419 Fuel Consumption: 216.2327 Total degradation: 515.4241

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.603
Episode: 11 Exploration P: 0.7872 Total reward: -5209.9975102470535 SOC: 0.9994 Cumulative_SOC_deviation: 498.967

maximum steps, simulation is done ... 
elapsed_time: 70.835
Episode: 38 Exploration P: 0.3803 Total reward: -2583.6986681906737 SOC: 0.3024 Cumulative_SOC_deviation: 247.9668 Fuel Consumption: 104.0309 Total degradation: 329.4064

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.873
Episode: 39 Exploration P: 0.3702 Total reward: -3157.1167283424898 SOC: 0.3145 Cumulative_SOC_deviation: 305.1900 Fuel Consumption: 105.2163 Total degradation: 328.9903

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.009
Episode: 40 Exploration P: 0.3605 Total reward: -3780.6279903505824 SOC: 0.2433 Cumulative_SOC_deviation: 367.9092 Fuel Consumption: 101.5364 Total degradation: 323.5907

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.614
Episode: 41 Exploration P: 0.3510 Total reward: -4302.043501138327 SOC: 0.1097 Cumulative_SOC_deviation: 420.

battery power is 12510.864123392124(+) but condition is not avail
elapsed_time: 42.102
Episode: 67 Exploration P: 0.2106 Total reward: [-3720.59264066] SOC: -0.0006 Cumulative_SOC_deviation: 268.6444 Fuel Consumption: 40.1555 Total degradation: 129.5435

../data/driving_cycles/city\01_FTP72_fuds.mat
battery power is 9609.887489930785(+) but condition is not avail
elapsed_time: 41.515
Episode: 68 Exploration P: 0.2074 Total reward: [-3761.64686906] SOC: -0.0006 Cumulative_SOC_deviation: 272.8722 Fuel Consumption: 38.9316 Total degradation: 128.2640

../data/driving_cycles/city\01_FTP72_fuds.mat
battery power is 12510.864123392124(+) but condition is not avail
elapsed_time: 41.629
Episode: 69 Exploration P: 0.2042 Total reward: [-3893.3123703] SOC: -0.0006 Cumulative_SOC_deviation: 285.8597 Fuel Consumption: 40.7224 Total degradation: 129.5311

../data/driving_cycles/city\01_FTP72_fuds.mat
battery power is 7846.62201545296(+) but condition is not avail
elapsed_time: 39.875
Episode: 70 Ex

battery power is 11451.3848233688(+) but condition is not avail
elapsed_time: 30.416
Episode: 95 Exploration P: 0.1465 Total reward: [-2848.43075262] SOC: -0.0005 Cumulative_SOC_deviation: 183.4629 Fuel Consumption: 19.8079 Total degradation: 65.1469

../data/driving_cycles/city\01_FTP72_fuds.mat
battery power is 6046.045638449542(+) but condition is not avail
elapsed_time: 29.995
Episode: 96 Exploration P: 0.1449 Total reward: [-2869.89357542] SOC: -0.0003 Cumulative_SOC_deviation: 185.5861 Fuel Consumption: 20.0364 Total degradation: 62.2198

../data/driving_cycles/city\01_FTP72_fuds.mat
battery power is 11832.532999028495(+) but condition is not avail
elapsed_time: 36.680
Episode: 97 Exploration P: 0.1431 Total reward: [-3424.88530261] SOC: -0.0013 Cumulative_SOC_deviation: 240.1821 Fuel Consumption: 29.0778 Total degradation: 91.3225

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.851
Episode: 98 Exploration P: 0.1395 Total rew

maximum steps, simulation is done ... 
elapsed_time: 71.294
Episode: 125 Exploration P: 0.0717 Total reward: -5599.749649217644 SOC: 1.0000 Cumulative_SOC_deviation: 526.1760 Fuel Consumption: 337.9898 Total degradation: 160.7401

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.258
Episode: 126 Exploration P: 0.0700 Total reward: -5593.653688941475 SOC: 1.0000 Cumulative_SOC_deviation: 525.3468 Fuel Consumption: 340.1854 Total degradation: 162.3972

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.216
Episode: 127 Exploration P: 0.0684 Total reward: -5589.816455009218 SOC: 1.0000 Cumulative_SOC_deviation: 525.0621 Fuel Consumption: 339.1959 Total degradation: 156.1246

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.344
Episode: 128 Exploration P: 0.0668 Total reward: -5587.974440608229 SOC: 1.0000 Cumulative_SOC_deviation: 525



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

maximum steps, simulation is done ... 
elapsed_time: 59.066
Episode: 3 Exploration P: 0.9781 Tota

maximum steps, simulation is done ... 
elapsed_time: 70.002
Episode: 25 Exploration P: 0.5391 Total reward: -5406.824740660192 SOC: 1.0000 Cumulative_SOC_deviation: 514.4557 Fuel Consumption: 262.2678 Total degradation: 425.3813

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.875
Episode: 26 Exploration P: 0.5248 Total reward: -5438.476503990978 SOC: 1.0000 Cumulative_SOC_deviation: 518.0961 Fuel Consumption: 257.5159 Total degradation: 436.7024

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.177
Episode: 27 Exploration P: 0.5108 Total reward: -5415.805054700609 SOC: 0.9994 Cumulative_SOC_deviation: 514.7105 Fuel Consumption: 268.7000 Total degradation: 424.9050

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.112
Episode: 28 Exploration P: 0.4973 Total reward: -5399.367965688627 SOC: 1.0000 Cumulative_SOC_deviation: 513.417

maximum steps, simulation is done ... 
elapsed_time: 71.498
Episode: 55 Exploration P: 0.2422 Total reward: -5526.341854401922 SOC: 1.0000 Cumulative_SOC_deviation: 521.4699 Fuel Consumption: 311.6428 Total degradation: 280.4479

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 72.027
Episode: 56 Exploration P: 0.2359 Total reward: -5536.133965994757 SOC: 1.0000 Cumulative_SOC_deviation: 522.3697 Fuel Consumption: 312.4365 Total degradation: 279.2093

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.707
Episode: 57 Exploration P: 0.2298 Total reward: -5547.402792379548 SOC: 1.0000 Cumulative_SOC_deviation: 523.2688 Fuel Consumption: 314.7146 Total degradation: 262.4572

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.913
Episode: 58 Exploration P: 0.2238 Total reward: -5533.344984403862 SOC: 1.0000 Cumulative_SOC_deviation: 522.263

maximum steps, simulation is done ... 
elapsed_time: 72.274
Episode: 85 Exploration P: 0.1119 Total reward: -5584.967423926867 SOC: 1.0000 Cumulative_SOC_deviation: 525.3193 Fuel Consumption: 331.7742 Total degradation: 189.6262

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 72.079
Episode: 86 Exploration P: 0.1091 Total reward: -5585.975050264836 SOC: 1.0000 Cumulative_SOC_deviation: 525.2407 Fuel Consumption: 333.5677 Total degradation: 178.7308

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.881
Episode: 87 Exploration P: 0.1064 Total reward: -5590.888891144332 SOC: 1.0000 Cumulative_SOC_deviation: 525.6837 Fuel Consumption: 334.0522 Total degradation: 186.2782

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.412
Episode: 88 Exploration P: 0.1038 Total reward: -5582.0032860479705 SOC: 1.0000 Cumulative_SOC_deviation: 525.06

maximum steps, simulation is done ... 
elapsed_time: 71.229
Episode: 115 Exploration P: 0.0547 Total reward: -5598.855280542 SOC: 1.0000 Cumulative_SOC_deviation: 525.8824 Fuel Consumption: 340.0308 Total degradation: 148.5867

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.692
Episode: 116 Exploration P: 0.0535 Total reward: -5595.760395109859 SOC: 1.0000 Cumulative_SOC_deviation: 525.6729 Fuel Consumption: 339.0309 Total degradation: 155.2827

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 72.083
Episode: 117 Exploration P: 0.0523 Total reward: -5604.162572450647 SOC: 1.0000 Cumulative_SOC_deviation: 526.1833 Fuel Consumption: 342.3295 Total degradation: 137.6937

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 72.708
Episode: 118 Exploration P: 0.0512 Total reward: -5600.35550754455 SOC: 1.0000 Cumulative_SOC_deviation: 525.929

maximum steps, simulation is done ... 
elapsed_time: 71.402
Episode: 145 Exploration P: 0.0296 Total reward: -5609.825204064043 SOC: 1.0000 Cumulative_SOC_deviation: 526.5465 Fuel Consumption: 344.3601 Total degradation: 130.5816

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.984
Episode: 146 Exploration P: 0.0291 Total reward: -5606.690181514841 SOC: 1.0000 Cumulative_SOC_deviation: 526.2402 Fuel Consumption: 344.2880 Total degradation: 129.3285

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.093
Episode: 147 Exploration P: 0.0286 Total reward: -5605.571115750144 SOC: 1.0000 Cumulative_SOC_deviation: 526.0273 Fuel Consumption: 345.2982 Total degradation: 123.0510

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 71.498
Episode: 148 Exploration P: 0.0281 Total reward: -5604.115513274331 SOC: 1.0000 Cumulative_SOC_deviation: 525

In [16]:
with open("DDPG_powers_in_watt.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [17]:
# results_dict