In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 

from vehicle_model_DDPG3 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 500
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 10000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 

    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    actor_model.save_weights("./{}/actor_model_checkpoint".format(root))
    critic_model.save_weights("./{}/critic_model_checkpoint".format(root))
    target_actor.save_weights("./{}/target_actor_checkpoint".format(root))
    target_critic.save_weights("./{}/target_critic_checkpoint".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [14]:
print(env.version)

num_trials = 3
results_dict = {} 
driving_cycle_paths = glob.glob("training/*.mat")
# driving_cycle_paths.pop(1)
driving_cycle_paths = driving_cycle_paths[:3]

for trial in range(num_trials): 
    print("")
    print("Trial {}".format(trial))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    for ep in range(total_episodes): 
        driving_cycle_path = np.random.choice(driving_cycle_paths)
        print(driving_cycle_path)
        env = initialization_env(driving_cycle_path, 10)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
        print("")
    
    root = "DDPG3_trial{}".format(trial+1)
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs 
    }

1

Trial 0

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 21.323
Episode: 1 Exploration P: 1.0000 Total reward: -1059.6634428306377 SOC: 0.8135 Cumulative_SOC_deviation: 99.9229 Fuel Consumption: 60.4347

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 18.345
Episode: 2 Exploration P: 1.0000 Total reward: -2742.5826924716216 SOC: 1.0000 Cumulative_SOC_deviation: 269.4851 Fuel Consumption: 47.7319

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 15.557
Episode: 3 Exploration P: 1.0000 Total reward: -981.5831262670164 SOC: 0.6502 Cumulative_SOC_deviation: 93.4759 Fuel Consumption: 46.8239

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 16.553
Episode: 4 Exploration P: 1.0000 Total reward: -2631.7639983020767 SOC: 1.0000 Cumulative_SOC_deviation: 258.5091 Fuel Consumption: 46.6728

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 15

maximum steps, simulation is done ... 
elapsed_time: 64.231
Episode: 25 Exploration P: 0.6909 Total reward: -1194.8327191980802 SOC: 0.4346 Cumulative_SOC_deviation: 116.2953 Fuel Consumption: 31.8799

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 94.053
Episode: 26 Exploration P: 0.6724 Total reward: -1515.429491448166 SOC: 0.5479 Cumulative_SOC_deviation: 147.4070 Fuel Consumption: 41.3600

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 101.380
Episode: 27 Exploration P: 0.6545 Total reward: -1702.938602438173 SOC: 0.5335 Cumulative_SOC_deviation: 166.2590 Fuel Consumption: 40.3489

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 77.299
Episode: 28 Exploration P: 0.6410 Total reward: -1165.5003208154455 SOC: 0.4263 Cumulative_SOC_deviation: 113.4304 Fuel Consumption: 31.1963

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 80.816
Episode: 29 Exploration P:

maximum steps, simulation is done ... 
elapsed_time: 82.729
Episode: 61 Exploration P: 0.2990 Total reward: -317.3620097239925 SOC: 0.6053 Cumulative_SOC_deviation: 27.2308 Fuel Consumption: 45.0541

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 65.971
Episode: 62 Exploration P: 0.2927 Total reward: -389.7435805661984 SOC: 0.6720 Cumulative_SOC_deviation: 37.5788 Fuel Consumption: 13.9556

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.756
Episode: 63 Exploration P: 0.2851 Total reward: -300.5829121326282 SOC: 0.6083 Cumulative_SOC_deviation: 25.5122 Fuel Consumption: 45.4607

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 65.793
Episode: 64 Exploration P: 0.2791 Total reward: -437.71344677694924 SOC: 0.6743 Cumulative_SOC_deviation: 42.3605 Fuel Consumption: 14.1080

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.330
Episode: 65 Exploration P: 0.2735

maximum steps, simulation is done ... 
elapsed_time: 64.707
Episode: 98 Exploration P: 0.1301 Total reward: -480.3715301573578 SOC: 0.5905 Cumulative_SOC_deviation: 43.7891 Fuel Consumption: 42.4801

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.326
Episode: 99 Exploration P: 0.1275 Total reward: -480.1911702233141 SOC: 0.5917 Cumulative_SOC_deviation: 43.7497 Fuel Consumption: 42.6938

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 84.374
Episode: 100 Exploration P: 0.1244 Total reward: -293.15732798045366 SOC: 0.5955 Cumulative_SOC_deviation: 24.9145 Fuel Consumption: 44.0127

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 65.013
Episode: 101 Exploration P: 0.1220 Total reward: -487.137259817662 SOC: 0.5895 Cumulative_SOC_deviation: 44.4740 Fuel Consumption: 42.3968

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 66.662
Episode: 102 Exploration P: 0.1

maximum steps, simulation is done ... 
elapsed_time: 66.092
Episode: 135 Exploration P: 0.0591 Total reward: -70.43944553129748 SOC: 0.5940 Cumulative_SOC_deviation: 6.2635 Fuel Consumption: 7.8049

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.924
Episode: 136 Exploration P: 0.0578 Total reward: -327.7954458713616 SOC: 0.5905 Cumulative_SOC_deviation: 28.4144 Fuel Consumption: 43.6512

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 66.534
Episode: 137 Exploration P: 0.0568 Total reward: -83.46627262296586 SOC: 0.5924 Cumulative_SOC_deviation: 7.5709 Fuel Consumption: 7.7572

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.541
Episode: 138 Exploration P: 0.0555 Total reward: -347.97065818923016 SOC: 0.5888 Cumulative_SOC_deviation: 30.4348 Fuel Consumption: 43.6228

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.683
Episode: 139 Exploration P: 0.05

maximum steps, simulation is done ... 
elapsed_time: 64.707
Episode: 171 Exploration P: 0.0308 Total reward: -419.64750084796833 SOC: 0.5882 Cumulative_SOC_deviation: 37.7338 Fuel Consumption: 42.3098

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 60.263
Episode: 172 Exploration P: 0.0304 Total reward: -430.4317289192492 SOC: 0.5865 Cumulative_SOC_deviation: 38.8215 Fuel Consumption: 42.2165

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 61.952
Episode: 173 Exploration P: 0.0300 Total reward: -111.69583710423235 SOC: 0.5906 Cumulative_SOC_deviation: 10.4109 Fuel Consumption: 7.5864

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.903
Episode: 174 Exploration P: 0.0294 Total reward: -336.0232048048526 SOC: 0.5888 Cumulative_SOC_deviation: 29.2391 Fuel Consumption: 43.6320

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 65.050
Episode: 175 Exploration P: 

maximum steps, simulation is done ... 
elapsed_time: 65.022
Episode: 207 Exploration P: 0.0189 Total reward: -376.7478640164843 SOC: 0.6035 Cumulative_SOC_deviation: 33.3298 Fuel Consumption: 43.4494

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 84.927
Episode: 208 Exploration P: 0.0187 Total reward: -221.3388583897846 SOC: 0.6012 Cumulative_SOC_deviation: 17.6354 Fuel Consumption: 44.9853

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.758
Episode: 209 Exploration P: 0.0185 Total reward: -366.61448672993413 SOC: 0.6008 Cumulative_SOC_deviation: 32.3335 Fuel Consumption: 43.2790

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 70.297
Episode: 210 Exploration P: 0.0183 Total reward: -33.09555832333711 SOC: 0.6019 Cumulative_SOC_deviation: 2.4707 Fuel Consumption: 8.3889

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 70.508
Episode: 211 Exploration P: 0.0

maximum steps, simulation is done ... 
elapsed_time: 68.605
Episode: 244 Exploration P: 0.0138 Total reward: -361.9960111107133 SOC: 0.6012 Cumulative_SOC_deviation: 31.8873 Fuel Consumption: 43.1233

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 68.934
Episode: 245 Exploration P: 0.0137 Total reward: -374.1611676801143 SOC: 0.6012 Cumulative_SOC_deviation: 33.0950 Fuel Consumption: 43.2115

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 68.255
Episode: 246 Exploration P: 0.0136 Total reward: -355.74329444708417 SOC: 0.6002 Cumulative_SOC_deviation: 31.2668 Fuel Consumption: 43.0757

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 89.322
Episode: 247 Exploration P: 0.0135 Total reward: -204.7460551639146 SOC: 0.6052 Cumulative_SOC_deviation: 16.0073 Fuel Consumption: 44.6730

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 68.204
Episode: 248 Exploration P:

maximum steps, simulation is done ... 
elapsed_time: 88.596
Episode: 280 Exploration P: 0.0116 Total reward: -214.7341256912409 SOC: 0.6022 Cumulative_SOC_deviation: 17.0195 Fuel Consumption: 44.5395

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 68.157
Episode: 281 Exploration P: 0.0116 Total reward: -369.2836575295537 SOC: 0.5985 Cumulative_SOC_deviation: 32.6292 Fuel Consumption: 42.9919

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.552
Episode: 282 Exploration P: 0.0115 Total reward: -213.86103233449936 SOC: 0.6030 Cumulative_SOC_deviation: 16.9287 Fuel Consumption: 44.5739

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 70.873
Episode: 283 Exploration P: 0.0115 Total reward: -29.175908745765412 SOC: 0.6018 Cumulative_SOC_deviation: 2.0800 Fuel Consumption: 8.3754

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 69.918
Episode: 284 Exploration P: 0.

maximum steps, simulation is done ... 
elapsed_time: 70.888
Episode: 317 Exploration P: 0.0107 Total reward: -27.368045076734184 SOC: 0.6019 Cumulative_SOC_deviation: 1.9010 Fuel Consumption: 8.3584

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 89.477
Episode: 318 Exploration P: 0.0107 Total reward: -208.99059091797352 SOC: 0.6013 Cumulative_SOC_deviation: 16.4394 Fuel Consumption: 44.5966

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 70.278
Episode: 319 Exploration P: 0.0107 Total reward: -24.799681006268315 SOC: 0.6006 Cumulative_SOC_deviation: 1.6527 Fuel Consumption: 8.2724

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 70.550
Episode: 320 Exploration P: 0.0106 Total reward: -28.506176521803095 SOC: 0.6020 Cumulative_SOC_deviation: 2.0118 Fuel Consumption: 8.3882

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 70.021
Episode: 321 Exploration P: 0.010

maximum steps, simulation is done ... 
elapsed_time: 69.859
Episode: 353 Exploration P: 0.0103 Total reward: -27.45770216955342 SOC: 0.6022 Cumulative_SOC_deviation: 1.9061 Fuel Consumption: 8.3964

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.657
Episode: 354 Exploration P: 0.0103 Total reward: -210.3082730515476 SOC: 0.6029 Cumulative_SOC_deviation: 16.5543 Fuel Consumption: 44.7653

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 92.753
Episode: 355 Exploration P: 0.0103 Total reward: -203.03194895380528 SOC: 0.6023 Cumulative_SOC_deviation: 15.8383 Fuel Consumption: 44.6486

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 69.858
Episode: 356 Exploration P: 0.0103 Total reward: -30.010573289610605 SOC: 0.6013 Cumulative_SOC_deviation: 2.1662 Fuel Consumption: 8.3489

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 70.493
Episode: 357 Exploration P: 0.01

maximum steps, simulation is done ... 
elapsed_time: 68.610
Episode: 390 Exploration P: 0.0101 Total reward: -368.71504092896345 SOC: 0.5972 Cumulative_SOC_deviation: 32.5765 Fuel Consumption: 42.9499

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 89.988
Episode: 391 Exploration P: 0.0101 Total reward: -207.49182073762728 SOC: 0.6014 Cumulative_SOC_deviation: 16.2864 Fuel Consumption: 44.6278

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 68.830
Episode: 392 Exploration P: 0.0101 Total reward: -366.18273691194963 SOC: 0.5992 Cumulative_SOC_deviation: 32.3069 Fuel Consumption: 43.1136

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 68.421
Episode: 393 Exploration P: 0.0101 Total reward: -365.8714383255134 SOC: 0.6001 Cumulative_SOC_deviation: 32.2693 Fuel Consumption: 43.1786

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 70.598
Episode: 394 Exploration P

maximum steps, simulation is done ... 
elapsed_time: 88.925
Episode: 426 Exploration P: 0.0101 Total reward: -211.5314399762565 SOC: 0.6023 Cumulative_SOC_deviation: 16.6869 Fuel Consumption: 44.6625

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.939
Episode: 427 Exploration P: 0.0101 Total reward: -212.1278778235824 SOC: 0.6013 Cumulative_SOC_deviation: 16.7487 Fuel Consumption: 44.6408

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.727
Episode: 428 Exploration P: 0.0101 Total reward: -214.38416380458057 SOC: 0.6008 Cumulative_SOC_deviation: 16.9804 Fuel Consumption: 44.5802

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 68.236
Episode: 429 Exploration P: 0.0101 Total reward: -368.67430345366756 SOC: 0.5994 Cumulative_SOC_deviation: 32.5550 Fuel Consumption: 43.1241

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 70.883
Episode: 430 Exploration P:

maximum steps, simulation is done ... 
elapsed_time: 70.826
Episode: 462 Exploration P: 0.0100 Total reward: -25.274389565537113 SOC: 0.6005 Cumulative_SOC_deviation: 1.7075 Fuel Consumption: 8.1998

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 71.128
Episode: 463 Exploration P: 0.0100 Total reward: -26.38989626975248 SOC: 0.6012 Cumulative_SOC_deviation: 1.8094 Fuel Consumption: 8.2961

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 69.370
Episode: 464 Exploration P: 0.0100 Total reward: -360.2373194597164 SOC: 0.5985 Cumulative_SOC_deviation: 31.7294 Fuel Consumption: 42.9437

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 68.810
Episode: 465 Exploration P: 0.0100 Total reward: -362.949170667539 SOC: 0.6002 Cumulative_SOC_deviation: 31.9840 Fuel Consumption: 43.1088

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 68.894
Episode: 466 Exploration P: 0.010

maximum steps, simulation is done ... 
elapsed_time: 71.189
Episode: 498 Exploration P: 0.0100 Total reward: -23.85998733848049 SOC: 0.6001 Cumulative_SOC_deviation: 1.5716 Fuel Consumption: 8.1441

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 70.095
Episode: 499 Exploration P: 0.0100 Total reward: -366.5887608851955 SOC: 0.6009 Cumulative_SOC_deviation: 32.3468 Fuel Consumption: 43.1208

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 68.686
Episode: 500 Exploration P: 0.0100 Total reward: -363.21176627311905 SOC: 0.6009 Cumulative_SOC_deviation: 32.0061 Fuel Consumption: 43.1506

model is saved..

Trial 1

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 16.349
Episode: 1 Exploration P: 1.0000 Total reward: -986.5594063303618 SOC: 0.6363 Cumulative_SOC_deviation: 94.0496 Fuel Consumption: 46.0632

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 16.319
Episo

maximum steps, simulation is done ... 
elapsed_time: 87.510
Episode: 22 Exploration P: 0.7190 Total reward: -1178.69170344687 SOC: 0.6094 Cumulative_SOC_deviation: 113.2700 Fuel Consumption: 45.9914

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 67.432
Episode: 23 Exploration P: 0.7041 Total reward: -1188.9887357535001 SOC: 0.4568 Cumulative_SOC_deviation: 115.5587 Fuel Consumption: 33.4013

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 70.549
Episode: 24 Exploration P: 0.6891 Total reward: -1967.6961607884825 SOC: 0.9616 Cumulative_SOC_deviation: 193.1760 Fuel Consumption: 35.9366

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 87.555
Episode: 25 Exploration P: 0.6707 Total reward: -1534.827331633303 SOC: 0.5584 Cumulative_SOC_deviation: 149.2707 Fuel Consumption: 42.1206

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 87.678
Episode: 26 Exploration P: 0

maximum steps, simulation is done ... 
elapsed_time: 71.202
Episode: 58 Exploration P: 0.3089 Total reward: -564.976177058526 SOC: 0.6998 Cumulative_SOC_deviation: 54.8986 Fuel Consumption: 15.9897

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 69.860
Episode: 59 Exploration P: 0.3024 Total reward: -656.2661086817402 SOC: 0.7159 Cumulative_SOC_deviation: 63.9077 Fuel Consumption: 17.1889

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 70.633
Episode: 60 Exploration P: 0.2961 Total reward: -554.5544646997521 SOC: 0.6941 Cumulative_SOC_deviation: 53.9007 Fuel Consumption: 15.5470

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.752
Episode: 61 Exploration P: 0.2884 Total reward: -330.50916845759576 SOC: 0.6069 Cumulative_SOC_deviation: 28.5398 Fuel Consumption: 45.1112

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 88.815
Episode: 62 Exploration P: 0.2808 

maximum steps, simulation is done ... 
elapsed_time: 70.129
Episode: 95 Exploration P: 0.1374 Total reward: -461.84580878934423 SOC: 0.6028 Cumulative_SOC_deviation: 41.8236 Fuel Consumption: 43.6101

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 94.159
Episode: 96 Exploration P: 0.1339 Total reward: -232.29753814832287 SOC: 0.6041 Cumulative_SOC_deviation: 18.7356 Fuel Consumption: 44.9411

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 71.037
Episode: 97 Exploration P: 0.1313 Total reward: -60.816370895844365 SOC: 0.6014 Cumulative_SOC_deviation: 5.2328 Fuel Consumption: 8.4882

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 66.901
Episode: 98 Exploration P: 0.1287 Total reward: -449.91906649594 SOC: 0.6034 Cumulative_SOC_deviation: 40.6288 Fuel Consumption: 43.6312

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 89.115
Episode: 99 Exploration P: 0.1255 

KeyboardInterrupt: 

In [None]:
with open("DDPG3.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)