In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 

from vehicle_model_DDPG4 import Environment 
from cell_model import CellModel 

# os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 500
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 10000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 

    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    actor_model.save_weights("./{}/actor_model_checkpoint".format(root))
    critic_model.save_weights("./{}/critic_model_checkpoint".format(root))
    target_actor.save_weights("./{}/target_actor_checkpoint".format(root))
    target_critic.save_weights("./{}/target_critic_checkpoint".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [14]:
print(env.version)

num_trials = 3
results_dict = {} 
driving_cycle_paths = glob.glob("training/*.mat")
# driving_cycle_paths.pop(1)
driving_cycle_paths = driving_cycle_paths[:5]

for trial in range(num_trials): 
    print("")
    print("Trial {}".format(trial))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    for ep in range(total_episodes): 
        driving_cycle_path = np.random.choice(driving_cycle_paths)
        print(driving_cycle_path)
        env = initialization_env(driving_cycle_path, 10)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
        print("")
    
    root = "DDPG4_trial{}".format(trial+1)
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs 
    }

1

Trial 0

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 15.673
Episode: 1 Exploration P: 1.0000 Total reward: -1015.5785012252499 SOC: 0.6618 Cumulative_SOC_deviation: 96.7532 Fuel Consumption: 48.0468

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 18.399
Episode: 2 Exploration P: 1.0000 Total reward: -2742.2959722705045 SOC: 1.0000 Cumulative_SOC_deviation: 269.4660 Fuel Consumption: 47.6356

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 15.518
Episode: 3 Exploration P: 1.0000 Total reward: -1054.7722689533127 SOC: 0.6683 Cumulative_SOC_deviation: 100.6446 Fuel Consumption: 48.3260

training\cudec_freeway.mat
maximum steps, simulation is done ... 
elapsed_time: 7.854
Episode: 4 Exploration P: 1.0000 Total reward: -1722.4400874180324 SOC: 0.0710 Cumulative_SOC_deviation: 169.8374 Fuel Consumption: 24.0665

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 

  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (
  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (


SOC is nan...
elapsed_time: 22.512
Episode: 15 Exploration P: 0.8766 Total reward: -1366.8036974516763 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 9.9557

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 92.923
Episode: 16 Exploration P: 0.8584 Total reward: -1024.8244509799333 SOC: 0.5853 Cumulative_SOC_deviation: 98.2372 Fuel Consumption: 42.4524

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 138.268
Episode: 17 Exploration P: 0.8354 Total reward: -916.7419048796183 SOC: 0.7028 Cumulative_SOC_deviation: 86.3962 Fuel Consumption: 52.7798

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 86.844
Episode: 18 Exploration P: 0.8176 Total reward: -2271.0552706611593 SOC: 1.0000 Cumulative_SOC_deviation: 223.0385 Fuel Consumption: 40.6700

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 96.609
Episode: 19 Exploration P: 0.8002 Total reward: -2129.52911783

  2 * r_dis)) * (v_dis - (v_dis ** 2 - 4 * r_dis * p_bat) ** (0.5)) * (p_bat >= 0)


SOC is nan...
elapsed_time: 19.957
Episode: 45 Exploration P: 0.4296 Total reward: -1445.3502760682868 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 7.0684

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 83.192
Episode: 46 Exploration P: 0.4208 Total reward: -1439.811139527521 SOC: 0.3108 Cumulative_SOC_deviation: 141.6436 Fuel Consumption: 23.3752

training\FTP_75_cycle.mat
maximum steps, simulation is done ... 
elapsed_time: 147.543
Episode: 47 Exploration P: 0.4057 Total reward: -3209.0427699319025 SOC: 0.3525 Cumulative_SOC_deviation: 315.4838 Fuel Consumption: 54.2052

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 85.604
Episode: 48 Exploration P: 0.3971 Total reward: -753.6492355788733 SOC: 0.7401 Cumulative_SOC_deviation: 73.4611 Fuel Consumption: 19.0380

training\cudec_freeway.mat
maximum steps, simulation is done ... 
elapsed_time: 42.788
Episode: 49 Exploration P: 0.3930 Total reward: -1688.67499089

maximum steps, simulation is done ... 
elapsed_time: 148.447
Episode: 82 Exploration P: 0.1791 Total reward: -843.7136308527839 SOC: 0.5779 Cumulative_SOC_deviation: 77.2859 Fuel Consumption: 70.8548

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 83.552
Episode: 83 Exploration P: 0.1755 Total reward: -2084.5580762095437 SOC: 0.1122 Cumulative_SOC_deviation: 207.3911 Fuel Consumption: 10.6473

training\cudec_freeway.mat


  2 * r_dis)) * (v_dis - (v_dis ** 2 - 4 * r_dis * p_bat) ** (0.5)) * (p_bat >= 0)


SOC is nan...
elapsed_time: 20.296
Episode: 84 Exploration P: 0.1747 Total reward: -1503.39242209794 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 3.3188

training\cudec_freeway.mat
SOC is nan...
elapsed_time: 20.174
Episode: 85 Exploration P: 0.1738 Total reward: -1503.5370932444462 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 3.1192

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 86.410
Episode: 86 Exploration P: 0.1703 Total reward: -68.74625235705754 SOC: 0.6124 Cumulative_SOC_deviation: 5.9250 Fuel Consumption: 9.4958

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 86.178
Episode: 87 Exploration P: 0.1669 Total reward: -68.16176068757936 SOC: 0.6074 Cumulative_SOC_deviation: 5.9070 Fuel Consumption: 9.0913

training\FTP_75_cycle.mat
SOC is nan...
elapsed_time: 124.180
Episode: 88 Exploration P: 0.1620 Total reward: -6487.864148745761 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 13.74

maximum steps, simulation is done ... 
elapsed_time: 149.156
Episode: 122 Exploration P: 0.0792 Total reward: -572.7546041262812 SOC: 0.6002 Cumulative_SOC_deviation: 49.9742 Fuel Consumption: 73.0124

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 109.513
Episode: 123 Exploration P: 0.0773 Total reward: -365.3028781348447 SOC: 0.6103 Cumulative_SOC_deviation: 31.9655 Fuel Consumption: 45.6480

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 109.406
Episode: 124 Exploration P: 0.0755 Total reward: -377.52145076715675 SOC: 0.6183 Cumulative_SOC_deviation: 33.1661 Fuel Consumption: 45.8605

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 84.671
Episode: 125 Exploration P: 0.0741 Total reward: -617.5817844993206 SOC: 0.5972 Cumulative_SOC_deviation: 57.3430 Fuel Consumption: 44.1516

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 109.419
Episode: 126 Exploratio

maximum steps, simulation is done ... 
elapsed_time: 87.745
Episode: 158 Exploration P: 0.0414 Total reward: -425.45775432993156 SOC: 0.5261 Cumulative_SOC_deviation: 42.2452 Fuel Consumption: 3.0059

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 84.727
Episode: 159 Exploration P: 0.0407 Total reward: -1805.0832523321312 SOC: 0.2762 Cumulative_SOC_deviation: 178.5543 Fuel Consumption: 19.5407

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 86.765
Episode: 160 Exploration P: 0.0401 Total reward: -394.5880827077741 SOC: 0.5283 Cumulative_SOC_deviation: 39.1390 Fuel Consumption: 3.1985

training\cudec_freeway.mat
maximum steps, simulation is done ... 
elapsed_time: 43.031
Episode: 161 Exploration P: 0.0397 Total reward: -1315.7736174498855 SOC: 0.3077 Cumulative_SOC_deviation: 127.6267 Fuel Consumption: 39.5066

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 86.838
Episode: 162 Exploration P:

maximum steps, simulation is done ... 
elapsed_time: 156.750
Episode: 194 Exploration P: 0.0234 Total reward: -1002.804564549522 SOC: 0.5511 Cumulative_SOC_deviation: 93.4384 Fuel Consumption: 68.4202

training\FTP_75_cycle.mat
maximum steps, simulation is done ... 
elapsed_time: 157.623
Episode: 195 Exploration P: 0.0229 Total reward: -1131.1993111439022 SOC: 0.5998 Cumulative_SOC_deviation: 105.9368 Fuel Consumption: 71.8314

training\cudec_freeway.mat
maximum steps, simulation is done ... 
elapsed_time: 45.772
Episode: 196 Exploration P: 0.0228 Total reward: -1006.1747417622831 SOC: 0.3783 Cumulative_SOC_deviation: 96.1721 Fuel Consumption: 44.4533

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 88.732
Episode: 197 Exploration P: 0.0225 Total reward: -421.5174839136283 SOC: 0.5995 Cumulative_SOC_deviation: 37.8168 Fuel Consumption: 43.3492

training\cudec_freeway.mat
maximum steps, simulation is done ... 
elapsed_time: 45.669
Episode: 198 Exploration

maximum steps, simulation is done ... 
elapsed_time: 114.598
Episode: 230 Exploration P: 0.0158 Total reward: -719.2215600241378 SOC: 0.5567 Cumulative_SOC_deviation: 67.7859 Fuel Consumption: 41.3623

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 91.346
Episode: 231 Exploration P: 0.0156 Total reward: -287.71231671418326 SOC: 0.5833 Cumulative_SOC_deviation: 28.0736 Fuel Consumption: 6.9765

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 115.638
Episode: 232 Exploration P: 0.0155 Total reward: -421.6053774433835 SOC: 0.5668 Cumulative_SOC_deviation: 37.9594 Fuel Consumption: 42.0111

training\cudec_freeway.mat
maximum steps, simulation is done ... 
elapsed_time: 45.510
Episode: 233 Exploration P: 0.0154 Total reward: -1128.9290413889016 SOC: 0.3527 Cumulative_SOC_deviation: 108.6367 Fuel Consumption: 42.5621

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 88.380
Episode: 234 Exploration 

maximum steps, simulation is done ... 
elapsed_time: 112.517
Episode: 266 Exploration P: 0.0126 Total reward: -2198.0922088399866 SOC: 0.4283 Cumulative_SOC_deviation: 216.6251 Fuel Consumption: 31.8416

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 84.420
Episode: 267 Exploration P: 0.0126 Total reward: -1456.1423352934298 SOC: 0.3792 Cumulative_SOC_deviation: 142.9214 Fuel Consumption: 26.9286

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 88.732
Episode: 268 Exploration P: 0.0125 Total reward: -1550.056423081311 SOC: 0.3740 Cumulative_SOC_deviation: 152.2970 Fuel Consumption: 27.0868

training\cudec_freeway.mat
maximum steps, simulation is done ... 
elapsed_time: 45.339
Episode: 269 Exploration P: 0.0125 Total reward: -1616.3721599143305 SOC: 0.2421 Cumulative_SOC_deviation: 158.0427 Fuel Consumption: 35.9452

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 115.211
Episode: 270 Explor

SOC is nan...
elapsed_time: 64.799
Episode: 304 Exploration P: 0.0112 Total reward: -1645.3108139303786 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 1.2069

training\06_udds_truck.mat
SOC is nan...
elapsed_time: 65.028
Episode: 305 Exploration P: 0.0112 Total reward: -1641.915998514293 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 1.2839

training\FTP_75_cycle.mat
SOC is nan...
elapsed_time: 141.592
Episode: 306 Exploration P: 0.0112 Total reward: -7773.929783764252 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.6956

training\FTP_75_cycle.mat
SOC is nan...
elapsed_time: 134.843
Episode: 307 Exploration P: 0.0111 Total reward: -7756.188346298844 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.6474

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 115.259
Episode: 308 Exploration P: 0.0111 Total reward: -5522.957979890181 SOC: -0.0347 Cumulative_SOC_deviation: 552.0990 Fuel Consumption: 1.9683

training\06_udds

SOC is nan...
elapsed_time: 19.244
Episode: 344 Exploration P: 0.0105 Total reward: -1440.2066183092927 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 0.8515

training\06_udds_truck.mat
SOC is nan...
elapsed_time: 59.834
Episode: 345 Exploration P: 0.0105 Total reward: -1638.054933808637 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 1.2262

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 91.452
Episode: 346 Exploration P: 0.0105 Total reward: -525.626730667108 SOC: 0.5060 Cumulative_SOC_deviation: 52.4094 Fuel Consumption: 1.5327

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 114.595
Episode: 347 Exploration P: 0.0105 Total reward: -5547.9623256739505 SOC: -0.0371 Cumulative_SOC_deviation: 554.6138 Fuel Consumption: 1.8239

training\FTP_75_cycle.mat
SOC is nan...
elapsed_time: 130.598
Episode: 348 Exploration P: 0.0105 Total reward: -7772.077074928931 SOC: nan Cumulative_SOC_deviation: nan Fuel Consum

SOC is nan...
elapsed_time: 59.790
Episode: 384 Exploration P: 0.0103 Total reward: -1652.1260748455925 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 1.1588

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 115.792
Episode: 385 Exploration P: 0.0102 Total reward: -5538.219285670242 SOC: -0.0350 Cumulative_SOC_deviation: 553.6261 Fuel Consumption: 1.9587

training\06_udds_truck.mat
SOC is nan...
elapsed_time: 59.839
Episode: 386 Exploration P: 0.0102 Total reward: -1639.1873794029618 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 1.3706

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 91.467
Episode: 387 Exploration P: 0.0102 Total reward: -483.4015176621905 SOC: 0.5127 Cumulative_SOC_deviation: 48.1378 Fuel Consumption: 2.0238

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 114.655
Episode: 388 Exploration P: 0.0102 Total reward: -5525.551588371215 SOC: -0.0337 Cumulative

SOC is nan...
elapsed_time: 130.539
Episode: 424 Exploration P: 0.0101 Total reward: -7773.79363243948 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.6378

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 115.209
Episode: 425 Exploration P: 0.0101 Total reward: -5508.47067680565 SOC: -0.0354 Cumulative_SOC_deviation: 550.6551 Fuel Consumption: 1.9202

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 115.692
Episode: 426 Exploration P: 0.0101 Total reward: -5505.244994435763 SOC: -0.0312 Cumulative_SOC_deviation: 550.3036 Fuel Consumption: 2.2090

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 115.062
Episode: 427 Exploration P: 0.0101 Total reward: -5515.461266790177 SOC: -0.0325 Cumulative_SOC_deviation: 551.3368 Fuel Consumption: 2.0935

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 91.352
Episode: 428 Exploration P: 0.0101 Total reward: -531.406

maximum steps, simulation is done ... 
elapsed_time: 91.473
Episode: 463 Exploration P: 0.0101 Total reward: -491.6659734806817 SOC: 0.5121 Cumulative_SOC_deviation: 48.9690 Fuel Consumption: 1.9756

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 91.970
Episode: 464 Exploration P: 0.0101 Total reward: -510.6496343536915 SOC: 0.5096 Cumulative_SOC_deviation: 50.8867 Fuel Consumption: 1.7830

training\06_udds_truck.mat
SOC is nan...
elapsed_time: 61.337
Episode: 465 Exploration P: 0.0100 Total reward: -1651.53904518286 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 1.0625

training\06_udds_truck.mat
SOC is nan...
elapsed_time: 60.262
Episode: 466 Exploration P: 0.0100 Total reward: -1648.372027983482 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 1.1395

training\cudec_freeway.mat
SOC is nan...
elapsed_time: 19.290
Episode: 467 Exploration P: 0.0100 Total reward: -1442.0788494013477 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumptio

maximum steps, simulation is done ... 
elapsed_time: 20.068
Episode: 2 Exploration P: 1.0000 Total reward: -1023.8080165850205 SOC: 0.8167 Cumulative_SOC_deviation: 96.3123 Fuel Consumption: 60.6851

training\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 16.201
Episode: 3 Exploration P: 1.0000 Total reward: -2743.002381293935 SOC: 1.0000 Cumulative_SOC_deviation: 269.4500 Fuel Consumption: 48.5022

training\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 15.605
Episode: 4 Exploration P: 1.0000 Total reward: -1045.4829551268763 SOC: 0.6690 Cumulative_SOC_deviation: 99.7176 Fuel Consumption: 48.3067

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 21.426
Episode: 5 Exploration P: 1.0000 Total reward: -993.5790985272648 SOC: 0.8008 Cumulative_SOC_deviation: 93.4001 Fuel Consumption: 59.5777

training\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 21.308
Episode: 6 Exploration P: 1.0000 T

KeyboardInterrupt: 

In [None]:
with open("DDPG4.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)