In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 

from vehicle_model_DDPG1 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 300
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 10000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 

    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    actor_model.save_weights("./{}/actor_model_checkpoint".format(root))
    critic_model.save_weights("./{}/critic_model_checkpoint".format(root))
    target_actor.save_weights("./{}/target_actor_checkpoint".format(root))
    target_critic.save_weights("./{}/target_critic_checkpoint".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [14]:
def test_agent(actor_model, reward_factor, test_path_start):
    test_cycles = glob.glob("../data/driving_cycles/city/*.mat")[test_path_start:]
    test_cycle = np.random.choice(test_cycles)
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0
    state = env.reset() 
    while True: 
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        action = policy_epsilon_greedy(tf_state, -1)
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
        
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    
    print("******************* Test is start *****************")
    print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    return env.history 
    

In [15]:
print(env.version)

num_trials = 1
results_dict = {} 
driving_cycle_paths = glob.glob("../data/driving_cycles/city/*.mat")[:3]

for trial in range(num_trials): 
    print("")
    print("Trial {}".format(trial))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    episode_test_history = [] 
    episode_num_test = [] 
    for ep in range(total_episodes): 
        driving_cycle_path = np.random.choice(driving_cycle_paths)
        print(driving_cycle_path)
        env = initialization_env(driving_cycle_path, 10)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
        print("")
        
        if (ep + 1) % 10 == 0: 
            history = test_agent(actor_model, 10, -1)
            episode_test_history.append(history) 
            episode_num_test.append(ep + 1)
    
    root = "DDPG2_trial{}".format(trial+1)
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs, 
        "test_history": episode_test_history, 
        "test_episode_num": episode_num_test, 
    }

1

Trial 0

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 13.938
Episode: 1 Exploration P: 1.0000 Total reward: -3687.233783860015 SOC: 1.0000 Cumulative_SOC_deviation: 356.6410 Fuel Consumption: 120.8237

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 12.616
Episode: 2 Exploration P: 1.0000 Total reward: -3696.2477246492517 SOC: 1.0000 Cumulative_SOC_deviation: 357.6711 Fuel Consumption: 119.5366

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 12.907
Episode: 3 Exploration P: 1.0000 Total reward: -3898.296379230762 SOC: 1.0000 Cumulative_SOC_deviation: 377.6379 Fuel Consumption: 121.9175

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 12.737
Episode: 4 Exploration P: 1.0000 Total reward: -3647.58304240338 SOC: 1.0000 Cumulative_SOC_deviation: 352.8702 Fuel Consumption: 118.8815

.

maximum steps, simulation is done ... 
elapsed_time: 63.738
Episode: 21 Exploration P: 0.7576 Total reward: -3639.209894764206 SOC: 1.0000 Cumulative_SOC_deviation: 354.5022 Fuel Consumption: 94.1874

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 64.959
Episode: 22 Exploration P: 0.7415 Total reward: -3631.233528033516 SOC: 1.0000 Cumulative_SOC_deviation: 353.8431 Fuel Consumption: 92.8027

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 64.029
Episode: 23 Exploration P: 0.7257 Total reward: -3719.311249542513 SOC: 1.0000 Cumulative_SOC_deviation: 362.7649 Fuel Consumption: 91.6621

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 64.967
Episode: 24 Exploration P: 0.7103 Total reward: -3623.5352451845883 SOC: 1.0000 Cumulative_SOC_deviation: 353.7254 Fuel Consumption: 86.2810

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps,

maximum steps, simulation is done ... 
elapsed_time: 64.207
Episode: 51 Exploration P: 0.3818 Total reward: -1029.02981886419 SOC: 0.6602 Cumulative_SOC_deviation: 97.7796 Fuel Consumption: 51.2342

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 65.545
Episode: 52 Exploration P: 0.3738 Total reward: -2777.14977390484 SOC: 1.0000 Cumulative_SOC_deviation: 272.6744 Fuel Consumption: 50.4060

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 65.831
Episode: 53 Exploration P: 0.3659 Total reward: -2718.4135754775384 SOC: 1.0000 Cumulative_SOC_deviation: 267.0226 Fuel Consumption: 48.1881

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 65.816
Episode: 54 Exploration P: 0.3582 Total reward: -2497.956049741504 SOC: 1.0000 Cumulative_SOC_deviation: 245.3037 Fuel Consumption: 44.9186

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, si

maximum steps, simulation is done ... 
elapsed_time: 82.495
Episode: 81 Exploration P: 0.1982 Total reward: -2814.5279910066647 SOC: 0.3768 Cumulative_SOC_deviation: 278.3232 Fuel Consumption: 31.2965

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 62.836
Episode: 82 Exploration P: 0.1942 Total reward: -1421.722938883255 SOC: 0.3278 Cumulative_SOC_deviation: 139.5466 Fuel Consumption: 26.2565

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 82.963
Episode: 83 Exploration P: 0.1892 Total reward: -2658.642708131141 SOC: 0.4012 Cumulative_SOC_deviation: 262.5413 Fuel Consumption: 33.2300

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 65.759
Episode: 84 Exploration P: 0.1854 Total reward: -1127.293463494329 SOC: 0.7987 Cumulative_SOC_deviation: 110.2365 Fuel Consumption: 24.9284

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum step

maximum steps, simulation is done ... 
elapsed_time: 81.534
Episode: 111 Exploration P: 0.1012 Total reward: -106.29569223582837 SOC: 0.6019 Cumulative_SOC_deviation: 6.2012 Fuel Consumption: 44.2839

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.534
Episode: 112 Exploration P: 0.0993 Total reward: -250.35648993596695 SOC: 0.6354 Cumulative_SOC_deviation: 20.4406 Fuel Consumption: 45.9506

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 63.960
Episode: 113 Exploration P: 0.0974 Total reward: -196.1253169109327 SOC: 0.6257 Cumulative_SOC_deviation: 15.1219 Fuel Consumption: 44.9063

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.124
Episode: 114 Exploration P: 0.0956 Total reward: -193.50111259315335 SOC: 0.6190 Cumulative_SOC_deviation: 14.9324 Fuel Consumption: 44.1767

../data/driving_cycles/city\06_udds_truck.mat
maximum st

maximum steps, simulation is done ... 
elapsed_time: 66.644
Episode: 141 Exploration P: 0.0554 Total reward: -94.97948122134763 SOC: 0.6210 Cumulative_SOC_deviation: 8.4484 Fuel Consumption: 10.4958

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.254
Episode: 142 Exploration P: 0.0545 Total reward: -133.0829244620973 SOC: 0.6034 Cumulative_SOC_deviation: 9.0870 Fuel Consumption: 42.2134

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.475
Episode: 143 Exploration P: 0.0536 Total reward: -158.14899531156271 SOC: 0.6106 Cumulative_SOC_deviation: 11.4975 Fuel Consumption: 43.1737

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 66.255
Episode: 144 Exploration P: 0.0526 Total reward: -54.78112580790255 SOC: 0.6052 Cumulative_SOC_deviation: 4.5739 Fuel Consumption: 9.0419

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, s

maximum steps, simulation is done ... 
elapsed_time: 63.268
Episode: 171 Exploration P: 0.0322 Total reward: -123.61838604033956 SOC: 0.6117 Cumulative_SOC_deviation: 8.0681 Fuel Consumption: 42.9378

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.842
Episode: 172 Exploration P: 0.0316 Total reward: -193.39694846617664 SOC: 0.5924 Cumulative_SOC_deviation: 14.9765 Fuel Consumption: 43.6320

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.457
Episode: 173 Exploration P: 0.0312 Total reward: -118.01493249689159 SOC: 0.6040 Cumulative_SOC_deviation: 7.6431 Fuel Consumption: 41.5838

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 64.627
Episode: 174 Exploration P: 0.0307 Total reward: -69.98885508860836 SOC: 0.6002 Cumulative_SOC_deviation: 6.1779 Fuel Consumption: 8.2101

../data/driving_cycles/city\07_manhattan.mat
maximum steps, 

maximum steps, simulation is done ... 
elapsed_time: 66.168
Episode: 201 Exploration P: 0.0211 Total reward: -99.80270768232907 SOC: 0.5929 Cumulative_SOC_deviation: 9.2231 Fuel Consumption: 7.5716

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.980
Episode: 202 Exploration P: 0.0209 Total reward: -117.62949755107748 SOC: 0.6039 Cumulative_SOC_deviation: 7.6627 Fuel Consumption: 41.0021

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.743
Episode: 203 Exploration P: 0.0207 Total reward: -115.9583863877447 SOC: 0.6144 Cumulative_SOC_deviation: 7.3877 Fuel Consumption: 42.0819

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.853
Episode: 204 Exploration P: 0.0204 Total reward: -115.9601297259442 SOC: 0.5996 Cumulative_SOC_deviation: 7.5346 Fuel Consumption: 40.6137

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, s

maximum steps, simulation is done ... 
elapsed_time: 64.354
Episode: 231 Exploration P: 0.0156 Total reward: -160.1614289160095 SOC: 0.5961 Cumulative_SOC_deviation: 11.9546 Fuel Consumption: 40.6157

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 66.218
Episode: 232 Exploration P: 0.0155 Total reward: -143.98875864778458 SOC: 0.5832 Cumulative_SOC_deviation: 13.7028 Fuel Consumption: 6.9610

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 64.564
Episode: 233 Exploration P: 0.0154 Total reward: -173.3934704011917 SOC: 0.5933 Cumulative_SOC_deviation: 13.2996 Fuel Consumption: 40.3972

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
elapsed_time: 65.137
Episode: 234 Exploration P: 0.0153 Total reward: -164.37717228860473 SOC: 0.5817 Cumulative_SOC_deviation: 15.7664 Fuel Consumption: 6.7131

../data/driving_cycles/city\07_manhattan.mat
maximum steps, 

maximum steps, simulation is done ... 
elapsed_time: 65.432
Episode: 261 Exploration P: 0.0128 Total reward: -35.30271129662876 SOC: 0.6033 Cumulative_SOC_deviation: 2.7116 Fuel Consumption: 8.1863

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 82.044
Episode: 262 Exploration P: 0.0127 Total reward: -95.0029924107743 SOC: 0.6013 Cumulative_SOC_deviation: 5.2203 Fuel Consumption: 42.7999

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 63.934
Episode: 263 Exploration P: 0.0127 Total reward: -125.00074896212823 SOC: 0.6104 Cumulative_SOC_deviation: 8.1725 Fuel Consumption: 43.2759

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 63.898
Episode: 264 Exploration P: 0.0126 Total reward: -129.23085966514452 SOC: 0.6129 Cumulative_SOC_deviation: 8.6064 Fuel Consumption: 43.1667

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, s

maximum steps, simulation is done ... 
elapsed_time: 62.958
Episode: 291 Exploration P: 0.0114 Total reward: -114.95420171315776 SOC: 0.6132 Cumulative_SOC_deviation: 7.2137 Fuel Consumption: 42.8168

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 82.870
Episode: 292 Exploration P: 0.0113 Total reward: -90.21027413685519 SOC: 0.6033 Cumulative_SOC_deviation: 4.7687 Fuel Consumption: 42.5233

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 85.708
Episode: 293 Exploration P: 0.0113 Total reward: -104.77841375809335 SOC: 0.6009 Cumulative_SOC_deviation: 6.2095 Fuel Consumption: 42.6838

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
elapsed_time: 75.758
Episode: 294 Exploration P: 0.0113 Total reward: -128.63265928213596 SOC: 0.6139 Cumulative_SOC_deviation: 8.5752 Fuel Consumption: 42.8802

../data/driving_cycles/city\06_udds_truck.mat
maximum steps

In [16]:
with open("DDPG2.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)