In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 

from vehicle_model_DDPG1 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 300
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 10000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 

    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    actor_model.save_weights("./{}/actor_model_checkpoint".format(root))
    critic_model.save_weights("./{}/critic_model_checkpoint".format(root))
    target_actor.save_weights("./{}/target_actor_checkpoint".format(root))
    target_critic.save_weights("./{}/target_critic_checkpoint".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [14]:
def test_agent(actor_model, reward_factor, test_path_start):
    test_cycles = glob.glob("../data/driving_cycles/city/*.mat")[test_path_start:]
    test_cycle = np.random.choice(test_cycles)
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0
    state = env.reset() 
    while True: 
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        action = policy_epsilon_greedy(tf_state, -1)
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
        
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    
    print("******************* Test is start *****************")
    print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    return env.history 

In [15]:
print(env.version)

num_trials = 1
results_dict = {} 
driving_cycle_paths = glob.glob("../data/driving_cycles/city/*.mat")[:7]

for trial in range(num_trials): 
    print("")
    print("Trial {}".format(trial))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    episode_test_history = [] 
    episode_num_test = [] 
    for ep in range(total_episodes): 
        k = ep % len(driving_cycle_paths)
        driving_cycle_path = driving_cycle_paths[k]
#         driving_cycle_path = np.random.choice(driving_cycle_paths)
        print(driving_cycle_path)
        env = initialization_env(driving_cycle_path, 10)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
        print("")
        
        if (ep + 1) % 10 == 0: 
            history = test_agent(actor_model, 10, -1)
            episode_test_history.append(history) 
            episode_num_test.append(ep + 1)
    
    root = "DDPG3_trial{}".format(trial+1)
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs, 
        "test_history": episode_test_history, 
        "test_episode_num": episode_num_test, 
    }

1

Trial 0

../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 13.551
Episode: 1 Exploration P: 1.0000 Total reward: -3888.440468935018 SOC: 1.0000 Cumulative_SOC_deviation: 376.4943 Fuel Consumption: 123.4976

../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 12.742
Episode: 2 Exploration P: 1.0000 Total reward: -3876.445887686777 SOC: 1.0000 Cumulative_SOC_deviation: 375.6155 Fuel Consumption: 120.2914

../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 13.157
Episode: 3 Exploration P: 1.0000 Total reward: -3913.223848436321 SOC: 1.0000 Cumulative_SOC_deviation: 379.1706 Fuel Consumption: 121.5181

../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 13.374
Episode: 4 Exploration P: 1.0000 Total reward: -3855.2939238575764 SOC: 1.0000 Cumulative_SOC_deviation: 373.7327 Fuel Consumption: 117.9671

../data/driving

maximum steps, simulation is done ... 
elapsed_time: 61.098
Episode: 21 Exploration P: 0.7429 Total reward: -3412.927332538614 SOC: 1.0000 Cumulative_SOC_deviation: 332.7738 Fuel Consumption: 85.1890

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 62.043
Episode: 22 Exploration P: 0.7275 Total reward: -3077.954226827811 SOC: 1.0000 Cumulative_SOC_deviation: 299.1497 Fuel Consumption: 86.4570

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 63.099
Episode: 23 Exploration P: 0.7124 Total reward: -3123.2517114079265 SOC: 1.0000 Cumulative_SOC_deviation: 303.4545 Fuel Consumption: 88.7066

../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
elapsed_time: 64.577
Episode: 24 Exploration P: 0.6974 Total reward: -3625.4674878978035 SOC: 1.0000 Cumulative_SOC_deviation: 353.8409 Fuel Consumption: 87.0580

../data/driving_cycles/city\ny_city_composite_truck.mat


maximum steps, simulation is done ... 
elapsed_time: 82.518
Episode: 51 Exploration P: 0.3789 Total reward: -923.4100790039976 SOC: 0.7507 Cumulative_SOC_deviation: 86.3188 Fuel Consumption: 60.2217

../data/driving_cycles/city\FTP_75_cycle.mat
maximum steps, simulation is done ... 
elapsed_time: 112.695
Episode: 52 Exploration P: 0.3653 Total reward: -1813.967561736504 SOC: 0.6932 Cumulative_SOC_deviation: 172.9869 Fuel Consumption: 84.0983

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 63.778
Episode: 53 Exploration P: 0.3578 Total reward: -1027.8539855873648 SOC: 0.5922 Cumulative_SOC_deviation: 98.1765 Fuel Consumption: 46.0887

../data/driving_cycles/city\FTP_75_cycle.mat
maximum steps, simulation is done ... 
elapsed_time: 113.084
Episode: 54 Exploration P: 0.3450 Total reward: -1380.8978127714663 SOC: 0.6560 Cumulative_SOC_deviation: 129.9670 Fuel Consumption: 81.2282

../data/driving_cycles/city\FTP_75_cycle.mat
maximum steps

maximum steps, simulation is done ... 
elapsed_time: 63.735
Episode: 81 Exploration P: 0.1781 Total reward: -1407.2099749903314 SOC: 0.3180 Cumulative_SOC_deviation: 138.1496 Fuel Consumption: 25.7135

../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
elapsed_time: 65.194
Episode: 82 Exploration P: 0.1745 Total reward: -981.0225732212042 SOC: 0.7377 Cumulative_SOC_deviation: 95.8363 Fuel Consumption: 22.6600

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.147
Episode: 83 Exploration P: 0.1700 Total reward: -2896.0860250783794 SOC: 0.3619 Cumulative_SOC_deviation: 286.6002 Fuel Consumption: 30.0841

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.357
Episode: 84 Exploration P: 0.1667 Total reward: -1451.8045487898164 SOC: 0.3039 Cumulative_SOC_deviation: 142.7539 Fuel Consumption: 24.2655

../data/driving_cycles/city\nuremberg_r36.mat
maximum st

  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (
  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (


SOC is nan...
elapsed_time: 99.261
Episode: 87 Exploration P: 0.1543 Total reward: -13914.536230282776 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 34.1924

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 82.976
Episode: 88 Exploration P: 0.1504 Total reward: -3088.8010402309365 SOC: 0.3221 Cumulative_SOC_deviation: 306.1811 Fuel Consumption: 26.9899

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 64.965
Episode: 89 Exploration P: 0.1474 Total reward: -761.9203354749645 SOC: 0.7469 Cumulative_SOC_deviation: 74.1232 Fuel Consumption: 20.6878

../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 65.890
Episode: 90 Exploration P: 0.1444 Total reward: -950.8744087208106 SOC: 0.7672 Cumulative_SOC_deviation: 92.8661 Fuel Consumption: 22.2134

maximum steps, simulation is done ... 
******************* Test is start *****************
../da

maximum steps, simulation is done ... 
elapsed_time: 64.351
Episode: 117 Exploration P: 0.0806 Total reward: -336.35771214622974 SOC: 0.6432 Cumulative_SOC_deviation: 29.0157 Fuel Consumption: 46.2008

../data/driving_cycles/city\FTP_75_cycle.mat
maximum steps, simulation is done ... 
elapsed_time: 111.929
Episode: 118 Exploration P: 0.0780 Total reward: -296.37945728665943 SOC: 0.6059 Cumulative_SOC_deviation: 22.3463 Fuel Consumption: 72.9162

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.471
Episode: 119 Exploration P: 0.0762 Total reward: -204.8640524027518 SOC: 0.5971 Cumulative_SOC_deviation: 15.9542 Fuel Consumption: 45.3223

../data/driving_cycles/city\ny_city_composite_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 62.741
Episode: 120 Exploration P: 0.0748 Total reward: -239.77792100240083 SOC: 0.6368 Cumulative_SOC_deviation: 22.6459 Fuel Consumption: 13.3189

maximum steps, simulation is done ... 
******

maximum steps, simulation is done ... 
elapsed_time: 66.518
Episode: 147 Exploration P: 0.0438 Total reward: -132.37438087186484 SOC: 0.6121 Cumulative_SOC_deviation: 12.2929 Fuel Consumption: 9.4452

../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
elapsed_time: 65.683
Episode: 148 Exploration P: 0.0431 Total reward: -158.19775274968046 SOC: 0.5578 Cumulative_SOC_deviation: 15.0206 Fuel Consumption: 7.9922

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.049
Episode: 149 Exploration P: 0.0424 Total reward: -329.89199631642157 SOC: 0.5777 Cumulative_SOC_deviation: 28.9194 Fuel Consumption: 40.6981

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.603
Episode: 150 Exploration P: 0.0417 Total reward: -369.74823524592915 SOC: 0.5834 Cumulative_SOC_deviation: 32.8652 Fuel Consumption: 41.0964

maximum steps, simulation is done ... 
*****************

maximum steps, simulation is done ... 
elapsed_time: 64.495
Episode: 177 Exploration P: 0.0265 Total reward: -4256.530331167897 SOC: 1.0000 Cumulative_SOC_deviation: 402.5338 Fuel Consumption: 231.1928

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.451
Episode: 178 Exploration P: 0.0261 Total reward: -4148.0689932183095 SOC: 1.0000 Cumulative_SOC_deviation: 391.9399 Fuel Consumption: 228.6698

../data/driving_cycles/city\ny_city_composite_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 62.075
Episode: 179 Exploration P: 0.0258 Total reward: -4041.713512398098 SOC: 1.0000 Cumulative_SOC_deviation: 382.0394 Fuel Consumption: 221.3196

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 82.414
Episode: 180 Exploration P: 0.0254 Total reward: -5447.582763871823 SOC: 1.0000 Cumulative_SOC_deviation: 515.2253 Fuel Consumption: 295.3296

maximum steps, simulation is done ... 


maximum steps, simulation is done ... 
elapsed_time: 66.571
Episode: 207 Exploration P: 0.0179 Total reward: -4283.499144243795 SOC: 1.0000 Cumulative_SOC_deviation: 405.0389 Fuel Consumption: 233.1106

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.712
Episode: 208 Exploration P: 0.0177 Total reward: -5456.848482447171 SOC: 1.0000 Cumulative_SOC_deviation: 515.9950 Fuel Consumption: 296.8982

../data/driving_cycles/city\ny_city_composite_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 62.511
Episode: 209 Exploration P: 0.0176 Total reward: -3731.067821035427 SOC: 0.9298 Cumulative_SOC_deviation: 360.9319 Fuel Consumption: 121.7487

../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 66.412
Episode: 210 Exploration P: 0.0174 Total reward: -202.9061611361606 SOC: 0.5667 Cumulative_SOC_deviation: 19.6804 Fuel Consumption: 6.1021

maximum steps, simulation is done ... 
********

maximum steps, simulation is done ... 
elapsed_time: 65.155
Episode: 237 Exploration P: 0.0140 Total reward: -152.71755489377406 SOC: 0.5829 Cumulative_SOC_deviation: 14.5513 Fuel Consumption: 7.2048

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 65.878
Episode: 238 Exploration P: 0.0139 Total reward: -171.28779095147257 SOC: 0.5887 Cumulative_SOC_deviation: 16.3802 Fuel Consumption: 7.4859

../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 66.143
Episode: 239 Exploration P: 0.0138 Total reward: -155.773347368257 SOC: 0.5817 Cumulative_SOC_deviation: 14.8846 Fuel Consumption: 6.9269

../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
elapsed_time: 65.749
Episode: 240 Exploration P: 0.0137 Total reward: -57.646492361390074 SOC: 0.6014 Cumulative_SOC_deviation: 4.6210 Fuel Consumption: 11.4361

maximum steps, simulation is done ... 
******************* Test i

maximum steps, simulation is done ... 
elapsed_time: 113.707
Episode: 267 Exploration P: 0.0119 Total reward: -1507.5841120631544 SOC: 0.4452 Cumulative_SOC_deviation: 144.6435 Fuel Consumption: 61.1489

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.559
Episode: 268 Exploration P: 0.0119 Total reward: -311.20400928557694 SOC: 0.6147 Cumulative_SOC_deviation: 26.5369 Fuel Consumption: 45.8348

../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 66.293
Episode: 269 Exploration P: 0.0119 Total reward: -218.86169350883287 SOC: 0.6300 Cumulative_SOC_deviation: 20.8061 Fuel Consumption: 10.8005

../data/driving_cycles/city\FTP_75_cycle.mat
maximum steps, simulation is done ... 
elapsed_time: 112.954
Episode: 270 Exploration P: 0.0118 Total reward: -454.22727424758864 SOC: 0.6091 Cumulative_SOC_deviation: 38.1003 Fuel Consumption: 73.2244

maximum steps, simulation is done ... 
*****************

maximum steps, simulation is done ... 
elapsed_time: 105.765
Episode: 297 Exploration P: 0.0109 Total reward: -342.7419361603239 SOC: 0.5899 Cumulative_SOC_deviation: 27.3848 Fuel Consumption: 68.8936

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 60.019
Episode: 298 Exploration P: 0.0109 Total reward: -135.37237702715342 SOC: 0.5982 Cumulative_SOC_deviation: 9.3589 Fuel Consumption: 41.7838

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 61.852
Episode: 299 Exploration P: 0.0108 Total reward: -327.7410961933362 SOC: 0.5555 Cumulative_SOC_deviation: 32.2775 Fuel Consumption: 4.9660

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 62.820
Episode: 300 Exploration P: 0.0108 Total reward: -340.65882326064536 SOC: 0.5554 Cumulative_SOC_deviation: 33.5792 Fuel Consumption: 4.8666

maximum steps, simulation is done ... 
******************* T

In [16]:
with open("DDPG3.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)