In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 

from vehicle_model_DDPG3 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 200
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 3000 

In [11]:
def initialization(reward_factor): 
    actor_model = get_actor() 
    critic_model = get_critic() 

    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
    
    buffer = Buffer(500000, BATCH_SIZE)
    env = Environment(cell_model, drving_cycle, battery_path, motor_path, reward_factor)
    return actor_model, critic_model, target_actor, target_critic, buffer, env 

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    actor_model.save_weights("./{}/actor_model_checkpoint".format(root))
    critic_model.save_weights("./{}/critic_model_checkpoint".format(root))
    target_actor.save_weights("./{}/target_actor_checkpoint".format(root))
    target_critic.save_weights("./{}/target_critic_checkpoint".format(root))
    print("model is saved..")

In [13]:
print(env.version)

num_trials = 3
reward_factor = 10
results_dict = {} 
for trial in range(num_trials): 
    print()
    print("Trial {}".format(trial))
    
    actor_model, critic_model, target_actor, target_critic, buffer, env = initialization(
        reward_factor
    )
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    for ep in range(total_episodes): 
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * steps)

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
    
#     root = "DDPG3_trial{}".format(trial+1)
#     save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs 
    }

1

Trial 0
maximum steps, simulation is done ... 
elapsed_time: 36.834
Episode: 1 Exploration P: 1.0000 Total reward: -5149.666902155747 SOC: 1.0000 Cumulative_SOC_deviation: 499.8453 Fuel Consumption: 151.2135
maximum steps, simulation is done ... 
elapsed_time: 36.493
Episode: 2 Exploration P: 1.0000 Total reward: -5060.330003553431 SOC: 1.0000 Cumulative_SOC_deviation: 491.0375 Fuel Consumption: 149.9547


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer c

maximum steps, simulation is done ... 
elapsed_time: 109.218
Episode: 28 Exploration P: 0.4689 Total reward: -2680.1784427677717 SOC: 1.0000 Cumulative_SOC_deviation: 259.0203 Fuel Consumption: 89.9753
maximum steps, simulation is done ... 
elapsed_time: 107.674
Episode: 29 Exploration P: 0.4565 Total reward: -3578.429258270688 SOC: 1.0000 Cumulative_SOC_deviation: 348.3945 Fuel Consumption: 94.4844
maximum steps, simulation is done ... 
elapsed_time: 107.773
Episode: 30 Exploration P: 0.4444 Total reward: -3055.9999763698997 SOC: 1.0000 Cumulative_SOC_deviation: 296.7036 Fuel Consumption: 88.9641
maximum steps, simulation is done ... 
elapsed_time: 107.633
Episode: 31 Exploration P: 0.4326 Total reward: -2389.8313460741756 SOC: 1.0000 Cumulative_SOC_deviation: 230.4520 Fuel Consumption: 85.3115
maximum steps, simulation is done ... 
elapsed_time: 107.349
Episode: 32 Exploration P: 0.4212 Total reward: -2939.822562816214 SOC: 1.0000 Cumulative_SOC_deviation: 285.1065 Fuel Consumption: 

maximum steps, simulation is done ... 
elapsed_time: 5289.585
Episode: 69 Exploration P: 0.1589 Total reward: -207.08113919405187 SOC: 0.6412 Cumulative_SOC_deviation: 15.3465 Fuel Consumption: 53.6161
maximum steps, simulation is done ... 
elapsed_time: 137.139
Episode: 70 Exploration P: 0.1548 Total reward: -135.81107436156145 SOC: 0.6051 Cumulative_SOC_deviation: 8.5452 Fuel Consumption: 50.3594
maximum steps, simulation is done ... 
elapsed_time: 129.551
Episode: 71 Exploration P: 0.1509 Total reward: -162.02472896263754 SOC: 0.6202 Cumulative_SOC_deviation: 10.9708 Fuel Consumption: 52.3167
maximum steps, simulation is done ... 
elapsed_time: 130.690
Episode: 72 Exploration P: 0.1471 Total reward: -182.50969623480228 SOC: 0.6188 Cumulative_SOC_deviation: 13.1320 Fuel Consumption: 51.1893
maximum steps, simulation is done ... 
elapsed_time: 127.030
Episode: 73 Exploration P: 0.1434 Total reward: -187.5552298843556 SOC: 0.6227 Cumulative_SOC_deviation: 13.6424 Fuel Consumption: 51.1

maximum steps, simulation is done ... 
elapsed_time: 98.932
Episode: 111 Exploration P: 0.0570 Total reward: -161.41870498154591 SOC: 0.5946 Cumulative_SOC_deviation: 11.4937 Fuel Consumption: 46.4819
maximum steps, simulation is done ... 
elapsed_time: 98.765
Episode: 112 Exploration P: 0.0557 Total reward: -171.75037334622914 SOC: 0.5922 Cumulative_SOC_deviation: 12.5436 Fuel Consumption: 46.3139
maximum steps, simulation is done ... 
elapsed_time: 98.625
Episode: 113 Exploration P: 0.0545 Total reward: -172.78138343845805 SOC: 0.5926 Cumulative_SOC_deviation: 12.6788 Fuel Consumption: 45.9935
maximum steps, simulation is done ... 
elapsed_time: 98.733
Episode: 114 Exploration P: 0.0533 Total reward: -177.81310365069015 SOC: 0.5935 Cumulative_SOC_deviation: 13.1126 Fuel Consumption: 46.6875
maximum steps, simulation is done ... 
elapsed_time: 98.609
Episode: 115 Exploration P: 0.0521 Total reward: -192.3545363102111 SOC: 0.5901 Cumulative_SOC_deviation: 14.5932 Fuel Consumption: 46.4

maximum steps, simulation is done ... 
elapsed_time: 97.192
Episode: 152 Exploration P: 0.0252 Total reward: -122.64141898529242 SOC: 0.6021 Cumulative_SOC_deviation: 7.6665 Fuel Consumption: 45.9761
maximum steps, simulation is done ... 
elapsed_time: 95.843
Episode: 153 Exploration P: 0.0248 Total reward: -131.6504506166169 SOC: 0.5976 Cumulative_SOC_deviation: 8.5602 Fuel Consumption: 46.0487
maximum steps, simulation is done ... 
elapsed_time: 98.651
Episode: 154 Exploration P: 0.0244 Total reward: -132.2598406185945 SOC: 0.5970 Cumulative_SOC_deviation: 8.6093 Fuel Consumption: 46.1671
maximum steps, simulation is done ... 
elapsed_time: 98.886
Episode: 155 Exploration P: 0.0240 Total reward: -117.93398604086295 SOC: 0.6001 Cumulative_SOC_deviation: 7.1989 Fuel Consumption: 45.9450
maximum steps, simulation is done ... 
elapsed_time: 98.996
Episode: 156 Exploration P: 0.0237 Total reward: -126.21921770017111 SOC: 0.6003 Cumulative_SOC_deviation: 8.0306 Fuel Consumption: 45.9128
ma

maximum steps, simulation is done ... 
elapsed_time: 98.254
Episode: 194 Exploration P: 0.0148 Total reward: -150.70770101865662 SOC: 0.5983 Cumulative_SOC_deviation: 10.5650 Fuel Consumption: 45.0581
maximum steps, simulation is done ... 
elapsed_time: 98.375
Episode: 195 Exploration P: 0.0147 Total reward: -169.71480422127954 SOC: 0.5961 Cumulative_SOC_deviation: 12.4164 Fuel Consumption: 45.5510
maximum steps, simulation is done ... 
elapsed_time: 98.700
Episode: 196 Exploration P: 0.0146 Total reward: -143.93375455815786 SOC: 0.6010 Cumulative_SOC_deviation: 9.8422 Fuel Consumption: 45.5115
maximum steps, simulation is done ... 
elapsed_time: 98.462
Episode: 197 Exploration P: 0.0144 Total reward: -143.82837371543374 SOC: 0.5960 Cumulative_SOC_deviation: 9.8728 Fuel Consumption: 45.1003
maximum steps, simulation is done ... 
elapsed_time: 98.366
Episode: 198 Exploration P: 0.0143 Total reward: -163.492473491643 SOC: 0.5946 Cumulative_SOC_deviation: 11.8701 Fuel Consumption: 44.7918

maximum steps, simulation is done ... 
elapsed_time: 96.568
Episode: 21 Exploration P: 0.5662 Total reward: -3717.837877626146 SOC: 1.0000 Cumulative_SOC_deviation: 361.8906 Fuel Consumption: 98.9315
maximum steps, simulation is done ... 
elapsed_time: 97.070
Episode: 22 Exploration P: 0.5511 Total reward: -3712.7541174391718 SOC: 1.0000 Cumulative_SOC_deviation: 361.3750 Fuel Consumption: 99.0037
maximum steps, simulation is done ... 
elapsed_time: 96.997
Episode: 23 Exploration P: 0.5364 Total reward: -4078.924577042576 SOC: 1.0000 Cumulative_SOC_deviation: 397.6918 Fuel Consumption: 102.0063
maximum steps, simulation is done ... 
elapsed_time: 97.105
Episode: 24 Exploration P: 0.5222 Total reward: -3833.076560073127 SOC: 1.0000 Cumulative_SOC_deviation: 373.7158 Fuel Consumption: 95.9186
maximum steps, simulation is done ... 
elapsed_time: 96.902
Episode: 25 Exploration P: 0.5083 Total reward: -2681.470437362153 SOC: 1.0000 Cumulative_SOC_deviation: 259.1340 Fuel Consumption: 90.130

maximum steps, simulation is done ... 
elapsed_time: 98.047
Episode: 62 Exploration P: 0.1904 Total reward: -2049.7855979218316 SOC: 0.4476 Cumulative_SOC_deviation: 200.5922 Fuel Consumption: 43.8636
maximum steps, simulation is done ... 
elapsed_time: 98.563
Episode: 63 Exploration P: 0.1855 Total reward: -2194.1569600808157 SOC: 0.4214 Cumulative_SOC_deviation: 215.2625 Fuel Consumption: 41.5317
maximum steps, simulation is done ... 
elapsed_time: 97.775
Episode: 64 Exploration P: 0.1808 Total reward: -2034.2036431093507 SOC: 0.4767 Cumulative_SOC_deviation: 198.8926 Fuel Consumption: 45.2771
maximum steps, simulation is done ... 
elapsed_time: 98.189
Episode: 65 Exploration P: 0.1761 Total reward: -2705.1805320658004 SOC: 0.3845 Cumulative_SOC_deviation: 266.6424 Fuel Consumption: 38.7561
maximum steps, simulation is done ... 
elapsed_time: 97.949
Episode: 66 Exploration P: 0.1716 Total reward: -2901.027858138221 SOC: 0.3730 Cumulative_SOC_deviation: 286.2612 Fuel Consumption: 38.4

maximum steps, simulation is done ... 
elapsed_time: 98.807
Episode: 104 Exploration P: 0.0669 Total reward: -112.58631312916344 SOC: 0.6090 Cumulative_SOC_deviation: 6.5460 Fuel Consumption: 47.1268
maximum steps, simulation is done ... 
elapsed_time: 98.521
Episode: 105 Exploration P: 0.0654 Total reward: -129.54649950522057 SOC: 0.6009 Cumulative_SOC_deviation: 8.2666 Fuel Consumption: 46.8806
maximum steps, simulation is done ... 
elapsed_time: 98.757
Episode: 106 Exploration P: 0.0639 Total reward: -127.61132615714499 SOC: 0.6100 Cumulative_SOC_deviation: 8.0095 Fuel Consumption: 47.5164
maximum steps, simulation is done ... 
elapsed_time: 97.622
Episode: 107 Exploration P: 0.0624 Total reward: -120.459397244315 SOC: 0.6050 Cumulative_SOC_deviation: 7.2797 Fuel Consumption: 47.6621
maximum steps, simulation is done ... 
elapsed_time: 98.669
Episode: 108 Exploration P: 0.0610 Total reward: -123.59185948241837 SOC: 0.6061 Cumulative_SOC_deviation: 7.6084 Fuel Consumption: 47.5075
ma

maximum steps, simulation is done ... 
elapsed_time: 98.424
Episode: 145 Exploration P: 0.0285 Total reward: -145.7523128652159 SOC: 0.5979 Cumulative_SOC_deviation: 9.9960 Fuel Consumption: 45.7922
maximum steps, simulation is done ... 
elapsed_time: 98.949
Episode: 146 Exploration P: 0.0280 Total reward: -162.0940793092372 SOC: 0.5950 Cumulative_SOC_deviation: 11.6627 Fuel Consumption: 45.4672
maximum steps, simulation is done ... 
elapsed_time: 98.794
Episode: 147 Exploration P: 0.0275 Total reward: -157.58589030775522 SOC: 0.5947 Cumulative_SOC_deviation: 11.1966 Fuel Consumption: 45.6200
maximum steps, simulation is done ... 
elapsed_time: 99.035
Episode: 148 Exploration P: 0.0270 Total reward: -155.699731870313 SOC: 0.6058 Cumulative_SOC_deviation: 10.9488 Fuel Consumption: 46.2121
maximum steps, simulation is done ... 
elapsed_time: 98.596
Episode: 149 Exploration P: 0.0265 Total reward: -154.921859093078 SOC: 0.5983 Cumulative_SOC_deviation: 10.9391 Fuel Consumption: 45.5312
ma

maximum steps, simulation is done ... 
elapsed_time: 108.761
Episode: 186 Exploration P: 0.0160 Total reward: -118.09103176566899 SOC: 0.6005 Cumulative_SOC_deviation: 7.1641 Fuel Consumption: 46.4497
maximum steps, simulation is done ... 
elapsed_time: 108.889
Episode: 187 Exploration P: 0.0158 Total reward: -160.70948135676355 SOC: 0.6021 Cumulative_SOC_deviation: 11.4399 Fuel Consumption: 46.3104
maximum steps, simulation is done ... 
elapsed_time: 108.989
Episode: 188 Exploration P: 0.0157 Total reward: -135.15017536391466 SOC: 0.6025 Cumulative_SOC_deviation: 8.8276 Fuel Consumption: 46.8744
maximum steps, simulation is done ... 
elapsed_time: 109.047
Episode: 189 Exploration P: 0.0155 Total reward: -105.39137935367683 SOC: 0.6028 Cumulative_SOC_deviation: 5.9850 Fuel Consumption: 45.5409
maximum steps, simulation is done ... 
elapsed_time: 108.653
Episode: 190 Exploration P: 0.0154 Total reward: -152.04882785756558 SOC: 0.6004 Cumulative_SOC_deviation: 10.6652 Fuel Consumption: 4

maximum steps, simulation is done ... 
elapsed_time: 231.112
Episode: 13 Exploration P: 0.7028 Total reward: -4638.762804163933 SOC: 1.0000 Cumulative_SOC_deviation: 451.9969 Fuel Consumption: 118.7939
maximum steps, simulation is done ... 
elapsed_time: 230.935
Episode: 14 Exploration P: 0.6840 Total reward: -4409.563564328593 SOC: 1.0000 Cumulative_SOC_deviation: 429.6599 Fuel Consumption: 112.9641
maximum steps, simulation is done ... 
elapsed_time: 231.658
Episode: 15 Exploration P: 0.6658 Total reward: -4441.473840409514 SOC: 1.0000 Cumulative_SOC_deviation: 433.0914 Fuel Consumption: 110.5600
maximum steps, simulation is done ... 
elapsed_time: 231.363
Episode: 16 Exploration P: 0.6480 Total reward: -4084.4564411614692 SOC: 1.0000 Cumulative_SOC_deviation: 397.6125 Fuel Consumption: 108.3313
maximum steps, simulation is done ... 
elapsed_time: 232.028
Episode: 17 Exploration P: 0.6307 Total reward: -4163.924723921239 SOC: 1.0000 Cumulative_SOC_deviation: 405.8679 Fuel Consumption

maximum steps, simulation is done ... 
elapsed_time: 94.656
Episode: 54 Exploration P: 0.2347 Total reward: -1829.0518411188234 SOC: 0.5434 Cumulative_SOC_deviation: 177.8481 Fuel Consumption: 50.5704
maximum steps, simulation is done ... 
elapsed_time: 94.344
Episode: 55 Exploration P: 0.2286 Total reward: -1820.9635761668908 SOC: 0.4957 Cumulative_SOC_deviation: 177.3829 Fuel Consumption: 47.1344
maximum steps, simulation is done ... 
elapsed_time: 94.294
Episode: 56 Exploration P: 0.2227 Total reward: -1804.9551999677515 SOC: 0.5115 Cumulative_SOC_deviation: 175.6841 Fuel Consumption: 48.1146
maximum steps, simulation is done ... 
elapsed_time: 94.403
Episode: 57 Exploration P: 0.2170 Total reward: -1884.5590961923342 SOC: 0.5160 Cumulative_SOC_deviation: 183.6362 Fuel Consumption: 48.1972
maximum steps, simulation is done ... 
elapsed_time: 94.139
Episode: 58 Exploration P: 0.2114 Total reward: -2427.099027463898 SOC: 0.4682 Cumulative_SOC_deviation: 238.2070 Fuel Consumption: 45.0

maximum steps, simulation is done ... 
elapsed_time: 68.728
Episode: 95 Exploration P: 0.0829 Total reward: -122.75869084101208 SOC: 0.6070 Cumulative_SOC_deviation: 7.5114 Fuel Consumption: 47.6451
maximum steps, simulation is done ... 
elapsed_time: 69.742
Episode: 96 Exploration P: 0.0809 Total reward: -126.93061934564116 SOC: 0.6150 Cumulative_SOC_deviation: 7.7421 Fuel Consumption: 49.5094
maximum steps, simulation is done ... 
elapsed_time: 69.540
Episode: 97 Exploration P: 0.0790 Total reward: -130.07285793614057 SOC: 0.6081 Cumulative_SOC_deviation: 8.2061 Fuel Consumption: 48.0115
maximum steps, simulation is done ... 
elapsed_time: 69.557
Episode: 98 Exploration P: 0.0771 Total reward: -125.53480004420784 SOC: 0.6062 Cumulative_SOC_deviation: 7.7561 Fuel Consumption: 47.9733
maximum steps, simulation is done ... 
elapsed_time: 69.496
Episode: 99 Exploration P: 0.0753 Total reward: -120.2504665438096 SOC: 0.6015 Cumulative_SOC_deviation: 7.2986 Fuel Consumption: 47.2644
maximu

maximum steps, simulation is done ... 
elapsed_time: 70.225
Episode: 136 Exploration P: 0.0336 Total reward: -158.65022291616035 SOC: 0.5974 Cumulative_SOC_deviation: 11.3247 Fuel Consumption: 45.4035
maximum steps, simulation is done ... 
elapsed_time: 70.249
Episode: 137 Exploration P: 0.0330 Total reward: -171.78796276272072 SOC: 0.5951 Cumulative_SOC_deviation: 12.6248 Fuel Consumption: 45.5398
maximum steps, simulation is done ... 
elapsed_time: 70.224
Episode: 138 Exploration P: 0.0324 Total reward: -164.8123067173494 SOC: 0.5926 Cumulative_SOC_deviation: 11.9677 Fuel Consumption: 45.1349
maximum steps, simulation is done ... 
elapsed_time: 70.003
Episode: 139 Exploration P: 0.0318 Total reward: -151.47391284566848 SOC: 0.5966 Cumulative_SOC_deviation: 10.6322 Fuel Consumption: 45.1518
maximum steps, simulation is done ... 
elapsed_time: 69.899
Episode: 140 Exploration P: 0.0312 Total reward: -138.63092280591363 SOC: 0.6003 Cumulative_SOC_deviation: 9.2723 Fuel Consumption: 45.90

maximum steps, simulation is done ... 
elapsed_time: 69.901
Episode: 177 Exploration P: 0.0177 Total reward: -139.06545812550976 SOC: 0.5965 Cumulative_SOC_deviation: 9.3737 Fuel Consumption: 45.3288
maximum steps, simulation is done ... 
elapsed_time: 69.789
Episode: 178 Exploration P: 0.0175 Total reward: -148.2009252363664 SOC: 0.5973 Cumulative_SOC_deviation: 10.3216 Fuel Consumption: 44.9845
maximum steps, simulation is done ... 
elapsed_time: 69.856
Episode: 179 Exploration P: 0.0173 Total reward: -150.22119759548917 SOC: 0.5943 Cumulative_SOC_deviation: 10.5605 Fuel Consumption: 44.6162
maximum steps, simulation is done ... 
elapsed_time: 69.858
Episode: 180 Exploration P: 0.0171 Total reward: -152.61177706336312 SOC: 0.5954 Cumulative_SOC_deviation: 10.7421 Fuel Consumption: 45.1906
maximum steps, simulation is done ... 
elapsed_time: 69.825
Episode: 181 Exploration P: 0.0169 Total reward: -157.32745830874939 SOC: 0.5960 Cumulative_SOC_deviation: 11.2159 Fuel Consumption: 45.16

In [14]:
with open("DDPG3_mass1200.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)