In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 

from vehicle_model_DDPG1 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 300
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 10000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 

    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    actor_model.save_weights("./{}/actor_model_checkpoint".format(root))
    critic_model.save_weights("./{}/critic_model_checkpoint".format(root))
    target_actor.save_weights("./{}/target_actor_checkpoint".format(root))
    target_critic.save_weights("./{}/target_critic_checkpoint".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [14]:
def test_agent(actor_model, reward_factor, test_path_start):
    test_cycles = glob.glob("../data/driving_cycles/city/*.mat")[test_path_start:]
    test_cycle = np.random.choice(test_cycles)
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0
    state = env.reset() 
    while True: 
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        action = policy_epsilon_greedy(tf_state, -1)
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
        
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    
    print("******************* Test is start *****************")
    print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    return env.history 

In [15]:
print(env.version)

num_trials = 1
results_dict = {} 
driving_cycle_paths = glob.glob("../data/driving_cycles/city/*.mat")[:20]

for trial in range(num_trials): 
    print("")
    print("Trial {}".format(trial))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    episode_test_history = [] 
    episode_num_test = [] 
    for ep in range(total_episodes): 
        driving_cycle_path = np.random.choice(driving_cycle_paths)
        print(driving_cycle_path)
        env = initialization_env(driving_cycle_path, 10)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
        print("")
        
        if (ep + 1) % 10 == 0: 
            history = test_agent(actor_model, 10, -1)
            episode_test_history.append(history) 
            episode_num_test.append(ep + 1)
    
    root = "DDPG4_trial{}".format(trial+1)
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs, 
        "test_history": episode_test_history, 
        "test_episode_num": episode_num_test, 
    }

1

Trial 0

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 13.655
Episode: 1 Exploration P: 1.0000 Total reward: -3917.75503969744 SOC: 1.0000 Cumulative_SOC_deviation: 379.5797 Fuel Consumption: 121.9577

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 12.586
Episode: 2 Exploration P: 1.0000 Total reward: -3660.053389566367 SOC: 1.0000 Cumulative_SOC_deviation: 354.0485 Fuel Consumption: 119.5682

../data/driving_cycles/city\VITO_RW_BUS_TMB_Line24N_1.mat
maximum steps, simulation is done ... 
elapsed_time: 38.075
Episode: 3 Exploration P: 1.0000 Total reward: -11513.9936530536 SOC: 1.0000 Cumulative_SOC_deviation: 1119.1317 Fuel Consumption: 322.6767

../data/driving_cycles/city\VITO_RW_Decade_Jumper_MOL_City1.mat
maximum steps, simulation is done ... 
elapsed_time: 29.304
Episode: 4 Exploration P: 1.0000 Total reward: -7591.485666309332 SOC: 1.0000 Cumulative_SOC_deviation: 737.1545 

maximum steps, simulation is done ... 
******************* Test is start *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -1042.2947551629077 SOC: 0.4632 Cumulative_SOC_deviation: 103.8983 Fuel Consumption: 3.3120
******************* Test is done *****************

../data/driving_cycles/city\VITO_RW_Decade_Jumper_BCN_City1.mat
maximum steps, simulation is done ... 
elapsed_time: 106.236
Episode: 21 Exploration P: 0.5742 Total reward: -6082.06664065406 SOC: 1.0000 Cumulative_SOC_deviation: 596.2725 Fuel Consumption: 119.3412

../data/driving_cycles/city\VITO_RW_Decade_Jumper_BCN_City1.mat
maximum steps, simulation is done ... 
elapsed_time: 105.235
Episode: 22 Exploration P: 0.5547 Total reward: -5778.441950781466 SOC: 1.0000 Cumulative_SOC_deviation: 567.6813 Fuel Consumption: 101.6292

../data/driving_cycles/city\VITO_RW_Decade_Polo_BCN_City1.mat
maximum steps, simulation is done ... 
elapsed_time: 89.506
Episode: 23 Exploration P: 0.5388 Total reward: -4718.56

maximum steps, simulation is done ... 
elapsed_time: 175.676
Episode: 50 Exploration P: 0.2420 Total reward: -9339.916135180709 SOC: 1.0000 Cumulative_SOC_deviation: 925.6220 Fuel Consumption: 83.6961

maximum steps, simulation is done ... 
******************* Test is start *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -1042.2947551629077 SOC: 0.4632 Cumulative_SOC_deviation: 103.8983 Fuel Consumption: 3.3120
******************* Test is done *****************

../data/driving_cycles/city\VITO_DUBDC.mat
maximum steps, simulation is done ... 
elapsed_time: 54.159
Episode: 51 Exploration P: 0.2379 Total reward: -335.59819202725777 SOC: 0.6845 Cumulative_SOC_deviation: 31.1236 Fuel Consumption: 24.3627

../data/driving_cycles/city\VITO_RW_Decade_Polo_BCN_City1.mat
maximum steps, simulation is done ... 
elapsed_time: 89.607
Episode: 52 Exploration P: 0.2312 Total reward: -1580.0523723131244 SOC: 0.8229 Cumulative_SOC_deviation: 153.7334 Fuel Consumption: 42.7179

.

  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (
  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (


SOC is nan...
elapsed_time: 96.733
Episode: 78 Exploration P: 0.1062 Total reward: -14784.98165206004 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 23.7337

../data/driving_cycles/city\VITO_MOLCity.mat
maximum steps, simulation is done ... 
elapsed_time: 90.564
Episode: 79 Exploration P: 0.1034 Total reward: -579.8153493826854 SOC: 0.4833 Cumulative_SOC_deviation: 55.9127 Fuel Consumption: 20.6884

../data/driving_cycles/city\FTP_75_cycle.mat
SOC is nan...
elapsed_time: 95.690
Episode: 80 Exploration P: 0.1005 Total reward: -14455.100057075151 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 23.1333

maximum steps, simulation is done ... 
******************* Test is start *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -1042.2947551629077 SOC: 0.4632 Cumulative_SOC_deviation: 103.8983 Fuel Consumption: 3.3120
******************* Test is done *****************

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 


maximum steps, simulation is done ... 
elapsed_time: 62.112
Episode: 108 Exploration P: 0.0521 Total reward: -236.62201254177657 SOC: 0.6195 Cumulative_SOC_deviation: 22.4621 Fuel Consumption: 12.0011

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 65.864
Episode: 109 Exploration P: 0.0512 Total reward: -272.9630273131337 SOC: 0.6287 Cumulative_SOC_deviation: 26.1906 Fuel Consumption: 11.0566

../data/driving_cycles/city\VITO_RW_Antwerp1_May19c.mat
maximum steps, simulation is done ... 
elapsed_time: 62.057
Episode: 110 Exploration P: 0.0504 Total reward: -213.926427425375 SOC: 0.6109 Cumulative_SOC_deviation: 20.2723 Fuel Consumption: 11.2030

maximum steps, simulation is done ... 
******************* Test is start *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -91.79022954486854 SOC: 0.6058 Cumulative_SOC_deviation: 7.7952 Fuel Consumption: 13.8381
******************* Test is done *****************

../data/d

maximum steps, simulation is done ... 
elapsed_time: 54.108
Episode: 137 Exploration P: 0.0278 Total reward: -62.57165207204169 SOC: 0.5993 Cumulative_SOC_deviation: 4.5569 Fuel Consumption: 17.0025

../data/driving_cycles/city\VITO_MOLCity.mat
maximum steps, simulation is done ... 
elapsed_time: 89.189
Episode: 138 Exploration P: 0.0273 Total reward: -79.50747267415461 SOC: 0.6137 Cumulative_SOC_deviation: 5.2070 Fuel Consumption: 27.4375

../data/driving_cycles/city\VITO_RW_Decade_Polo_BCN_City1.mat
maximum steps, simulation is done ... 
elapsed_time: 89.776
Episode: 139 Exploration P: 0.0268 Total reward: -100.91909877744047 SOC: 0.5991 Cumulative_SOC_deviation: 7.7906 Fuel Consumption: 23.0130

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 66.054
Episode: 140 Exploration P: 0.0264 Total reward: -79.49950825677342 SOC: 0.6084 Cumulative_SOC_deviation: 7.0063 Fuel Consumption: 9.4363

maximum steps, simulation is done ... 
*********

maximum steps, simulation is done ... 
elapsed_time: 88.573
Episode: 166 Exploration P: 0.0167 Total reward: -462.3406936424682 SOC: 0.5472 Cumulative_SOC_deviation: 44.2713 Fuel Consumption: 19.6280

../data/driving_cycles/city\VITO_RW_Antwerp1_May19c.mat
maximum steps, simulation is done ... 
elapsed_time: 62.203
Episode: 167 Exploration P: 0.0166 Total reward: -404.7696777407328 SOC: 0.5188 Cumulative_SOC_deviation: 40.0953 Fuel Consumption: 3.8162

../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 65.683
Episode: 168 Exploration P: 0.0165 Total reward: -265.7136342405356 SOC: 0.5537 Cumulative_SOC_deviation: 26.0643 Fuel Consumption: 5.0703

../data/driving_cycles/city\FTP_75_cycle.mat
SOC is nan...
elapsed_time: 96.262
Episode: 169 Exploration P: 0.0162 Total reward: -13394.74151234804 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 21.2076

../data/driving_cycles/city\VITO_DUBDC.mat
maximum steps, simulation is done ... 
elaps

  2 * r_dis)) * (v_dis - (v_dis ** 2 - 4 * r_dis * p_bat) ** (0.5)) * (p_bat >= 0)


maximum steps, simulation is done ... 
elapsed_time: 113.982
Episode: 184 Exploration P: 0.0140 Total reward: -7273.284968753471 SOC: -0.0586 Cumulative_SOC_deviation: 724.5680 Fuel Consumption: 27.6052

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 65.845
Episode: 185 Exploration P: 0.0139 Total reward: -322.00042618841104 SOC: 0.5452 Cumulative_SOC_deviation: 31.7628 Fuel Consumption: 4.3722

../data/driving_cycles/city\ny_city_traffic.mat
maximum steps, simulation is done ... 
elapsed_time: 35.945
Episode: 186 Exploration P: 0.0139 Total reward: -101.82729390042324 SOC: 0.5806 Cumulative_SOC_deviation: 9.8201 Fuel Consumption: 3.6264

../data/driving_cycles/city\VITO_RW_BUS_TMB_Line24N_1.mat
maximum steps, simulation is done ... 
elapsed_time: 178.342
Episode: 187 Exploration P: 0.0137 Total reward: -850.2603721694869 SOC: 0.5057 Cumulative_SOC_deviation: 83.8975 Fuel Consumption: 11.2853

../data/driving_cycles/city\VITO_DUBDC.mat

maximum steps, simulation is done ... 
elapsed_time: 115.003
Episode: 213 Exploration P: 0.0118 Total reward: -7574.873510234353 SOC: 1.0000 Cumulative_SOC_deviation: 716.9024 Fuel Consumption: 405.8497

../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 66.519
Episode: 214 Exploration P: 0.0118 Total reward: -4283.170915327708 SOC: 1.0000 Cumulative_SOC_deviation: 404.9124 Fuel Consumption: 234.0472

../data/driving_cycles/city\VITO_RW_Decade_Octavia_BCN_City1.mat
maximum steps, simulation is done ... 
elapsed_time: 96.484
Episode: 215 Exploration P: 0.0117 Total reward: -6491.497376044532 SOC: 1.0000 Cumulative_SOC_deviation: 614.6741 Fuel Consumption: 344.7564

../data/driving_cycles/city\VITO_RW_BUS_TMB_Line24N_1.mat
maximum steps, simulation is done ... 
elapsed_time: 177.771
Episode: 216 Exploration P: 0.0116 Total reward: -11653.06968063348 SOC: 0.9288 Cumulative_SOC_deviation: 1118.7679 Fuel Consumption: 465.3907

../data/driving_cycl

maximum steps, simulation is done ... 
elapsed_time: 117.971
Episode: 242 Exploration P: 0.0107 Total reward: -9112.90708513517 SOC: 0.9773 Cumulative_SOC_deviation: 870.1798 Fuel Consumption: 411.1087

../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Full_1.mat
maximum steps, simulation is done ... 
elapsed_time: 134.289
Episode: 243 Exploration P: 0.0107 Total reward: -4959.7491671632615 SOC: 0.3247 Cumulative_SOC_deviation: 494.9869 Fuel Consumption: 9.8797

../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Full_1.mat
maximum steps, simulation is done ... 
elapsed_time: 134.407
Episode: 244 Exploration P: 0.0107 Total reward: -6837.4323296898565 SOC: 1.0000 Cumulative_SOC_deviation: 653.5833 Fuel Consumption: 301.5995

../data/driving_cycles/city\VITO_RW_Decade_Jumper_MOL_City1.mat
maximum steps, simulation is done ... 
elapsed_time: 101.427
Episode: 245 Exploration P: 0.0106 Total reward: -1375.371906621884 SOC: 0.4567 Cumulative_SOC_deviation: 135.9770 Fuel Consumption: 15.60

KeyboardInterrupt: 

In [None]:
with open("DDPG4.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)