In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 

from vehicle_model_DDPG3 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_e-4wd_Battery.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_id_75_110_Westinghouse.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 200
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 3000 

In [11]:
def initialization(reward_factor): 
    actor_model = get_actor() 
    critic_model = get_critic() 

    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
    
    buffer = Buffer(500000, BATCH_SIZE)
    env = Environment(cell_model, drving_cycle, battery_path, motor_path, reward_factor)
    return actor_model, critic_model, target_actor, target_critic, buffer, env 

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    actor_model.save_weights("./{}/actor_model_checkpoint".format(root))
    critic_model.save_weights("./{}/critic_model_checkpoint".format(root))
    target_actor.save_weights("./{}/target_actor_checkpoint".format(root))
    target_critic.save_weights("./{}/target_critic_checkpoint".format(root))
    print("model is saved..")

In [13]:
print(env.version)

num_trials = 3
reward_factor = 10
results_dict = {} 
for trial in range(num_trials): 
    print()
    print("Trial {}".format(trial))
    
    actor_model, critic_model, target_actor, target_critic, buffer, env = initialization(
        reward_factor
    )
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    for ep in range(total_episodes): 
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * steps)

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
    
#     root = "DDPG3_trial{}".format(trial+1)
#     save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs 
    }

1

Trial 0
Available condition is not avail... SOC: 1
elapsed_time: 31.681
Episode: 1 Exploration P: 1.0000 Total reward: -3286.2712443325154 SOC: 1.0000 Cumulative_SOC_deviation: 321.0288 Fuel Consumption: 75.9834
Available condition is not avail... SOC: 1
elapsed_time: 29.425
Episode: 2 Exploration P: 1.0000 Total reward: -3365.5774449100677 SOC: 1.0000 Cumulative_SOC_deviation: 328.7582 Fuel Consumption: 77.9954


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base

maximum steps, simulation is done ... 
elapsed_time: 106.156
Episode: 28 Exploration P: 0.4720 Total reward: -613.8844554494998 SOC: 0.6966 Cumulative_SOC_deviation: 56.3479 Fuel Consumption: 50.4053
maximum steps, simulation is done ... 
elapsed_time: 106.266
Episode: 29 Exploration P: 0.4595 Total reward: -571.7921088984685 SOC: 0.7039 Cumulative_SOC_deviation: 52.1108 Fuel Consumption: 50.6839
maximum steps, simulation is done ... 
elapsed_time: 105.971
Episode: 30 Exploration P: 0.4473 Total reward: -570.1884466540504 SOC: 0.6686 Cumulative_SOC_deviation: 52.1764 Fuel Consumption: 48.4242
maximum steps, simulation is done ... 
elapsed_time: 105.979
Episode: 31 Exploration P: 0.4355 Total reward: -624.6821427236465 SOC: 0.6989 Cumulative_SOC_deviation: 57.4184 Fuel Consumption: 50.4981
maximum steps, simulation is done ... 
elapsed_time: 105.750
Episode: 32 Exploration P: 0.4240 Total reward: -550.5983410708614 SOC: 0.6652 Cumulative_SOC_deviation: 50.2453 Fuel Consumption: 48.1456


maximum steps, simulation is done ... 
elapsed_time: 107.649
Episode: 69 Exploration P: 0.1599 Total reward: -137.6993552215659 SOC: 0.5991 Cumulative_SOC_deviation: 9.6520 Fuel Consumption: 41.1797
maximum steps, simulation is done ... 
elapsed_time: 107.565
Episode: 70 Exploration P: 0.1558 Total reward: -129.73471642675773 SOC: 0.5973 Cumulative_SOC_deviation: 8.8742 Fuel Consumption: 40.9927
maximum steps, simulation is done ... 
elapsed_time: 107.656
Episode: 71 Exploration P: 0.1519 Total reward: -135.7741841543787 SOC: 0.5901 Cumulative_SOC_deviation: 9.5128 Fuel Consumption: 40.6460
maximum steps, simulation is done ... 
elapsed_time: 107.828
Episode: 72 Exploration P: 0.1480 Total reward: -146.406367661129 SOC: 0.5948 Cumulative_SOC_deviation: 10.5644 Fuel Consumption: 40.7628
maximum steps, simulation is done ... 
elapsed_time: 108.608
Episode: 73 Exploration P: 0.1443 Total reward: -122.42197614011853 SOC: 0.5985 Cumulative_SOC_deviation: 8.1532 Fuel Consumption: 40.8904
max

maximum steps, simulation is done ... 
elapsed_time: 108.291
Episode: 110 Exploration P: 0.0586 Total reward: -139.933179349686 SOC: 0.5989 Cumulative_SOC_deviation: 9.9320 Fuel Consumption: 40.6135
maximum steps, simulation is done ... 
elapsed_time: 107.596
Episode: 111 Exploration P: 0.0573 Total reward: -153.18205937922482 SOC: 0.5950 Cumulative_SOC_deviation: 11.2673 Fuel Consumption: 40.5087
maximum steps, simulation is done ... 
elapsed_time: 107.741
Episode: 112 Exploration P: 0.0560 Total reward: -132.4627945172124 SOC: 0.5973 Cumulative_SOC_deviation: 9.1864 Fuel Consumption: 40.5983
maximum steps, simulation is done ... 
elapsed_time: 107.883
Episode: 113 Exploration P: 0.0548 Total reward: -147.70258803864743 SOC: 0.5927 Cumulative_SOC_deviation: 10.7661 Fuel Consumption: 40.0416
maximum steps, simulation is done ... 
elapsed_time: 107.407
Episode: 114 Exploration P: 0.0536 Total reward: -137.20221231743915 SOC: 0.5952 Cumulative_SOC_deviation: 9.6732 Fuel Consumption: 40.4

maximum steps, simulation is done ... 
elapsed_time: 102.215
Episode: 151 Exploration P: 0.0258 Total reward: -191.5884560555631 SOC: 0.5943 Cumulative_SOC_deviation: 15.0988 Fuel Consumption: 40.6006
maximum steps, simulation is done ... 
elapsed_time: 102.866
Episode: 152 Exploration P: 0.0253 Total reward: -158.5271391453913 SOC: 0.5948 Cumulative_SOC_deviation: 11.7871 Fuel Consumption: 40.6559
maximum steps, simulation is done ... 
elapsed_time: 107.088
Episode: 153 Exploration P: 0.0249 Total reward: -207.12777871331105 SOC: 0.5946 Cumulative_SOC_deviation: 16.5983 Fuel Consumption: 41.1447
maximum steps, simulation is done ... 
elapsed_time: 108.124
Episode: 154 Exploration P: 0.0245 Total reward: -208.7696079781039 SOC: 0.5952 Cumulative_SOC_deviation: 16.7403 Fuel Consumption: 41.3663
maximum steps, simulation is done ... 
elapsed_time: 109.714
Episode: 155 Exploration P: 0.0241 Total reward: -201.60728217960704 SOC: 0.5941 Cumulative_SOC_deviation: 16.0560 Fuel Consumption: 4

maximum steps, simulation is done ... 
elapsed_time: 104.225
Episode: 192 Exploration P: 0.0151 Total reward: -141.18146870542728 SOC: 0.5983 Cumulative_SOC_deviation: 10.0823 Fuel Consumption: 40.3583
maximum steps, simulation is done ... 
elapsed_time: 107.396
Episode: 193 Exploration P: 0.0150 Total reward: -152.48636863933675 SOC: 0.5964 Cumulative_SOC_deviation: 11.2290 Fuel Consumption: 40.1961
maximum steps, simulation is done ... 
elapsed_time: 107.357
Episode: 194 Exploration P: 0.0148 Total reward: -150.70710093192798 SOC: 0.5990 Cumulative_SOC_deviation: 11.0395 Fuel Consumption: 40.3125
maximum steps, simulation is done ... 
elapsed_time: 107.440
Episode: 195 Exploration P: 0.0147 Total reward: -160.33596312311258 SOC: 0.5954 Cumulative_SOC_deviation: 12.0380 Fuel Consumption: 39.9561
maximum steps, simulation is done ... 
elapsed_time: 107.752
Episode: 196 Exploration P: 0.0146 Total reward: -154.4569792579974 SOC: 0.5953 Cumulative_SOC_deviation: 11.4447 Fuel Consumption:

maximum steps, simulation is done ... 
elapsed_time: 105.322
Episode: 18 Exploration P: 0.6180 Total reward: -1458.2552218610385 SOC: 0.8512 Cumulative_SOC_deviation: 139.7016 Fuel Consumption: 61.2393
maximum steps, simulation is done ... 
elapsed_time: 105.297
Episode: 19 Exploration P: 0.6016 Total reward: -1187.5009257691163 SOC: 0.8218 Cumulative_SOC_deviation: 112.8274 Fuel Consumption: 59.2273
maximum steps, simulation is done ... 
elapsed_time: 105.678
Episode: 20 Exploration P: 0.5855 Total reward: -1092.3834905043288 SOC: 0.8004 Cumulative_SOC_deviation: 103.4714 Fuel Consumption: 57.6692
maximum steps, simulation is done ... 
elapsed_time: 106.078
Episode: 21 Exploration P: 0.5700 Total reward: -1050.6415397326032 SOC: 0.7985 Cumulative_SOC_deviation: 99.3003 Fuel Consumption: 57.6383
maximum steps, simulation is done ... 
elapsed_time: 105.745
Episode: 22 Exploration P: 0.5548 Total reward: -1300.3208819172712 SOC: 0.8428 Cumulative_SOC_deviation: 123.9278 Fuel Consumption:

maximum steps, simulation is done ... 
elapsed_time: 106.710
Episode: 59 Exploration P: 0.2072 Total reward: -2333.95406077656 SOC: 0.5814 Cumulative_SOC_deviation: 229.0977 Fuel Consumption: 42.9772
maximum steps, simulation is done ... 
elapsed_time: 107.190
Episode: 60 Exploration P: 0.2019 Total reward: -160.45933874848643 SOC: 0.6140 Cumulative_SOC_deviation: 11.7156 Fuel Consumption: 43.3029
maximum steps, simulation is done ... 
elapsed_time: 106.982
Episode: 61 Exploration P: 0.1967 Total reward: -125.25778402977998 SOC: 0.6101 Cumulative_SOC_deviation: 8.2388 Fuel Consumption: 42.8695
maximum steps, simulation is done ... 
elapsed_time: 107.048
Episode: 62 Exploration P: 0.1916 Total reward: -105.98040460714854 SOC: 0.6052 Cumulative_SOC_deviation: 6.3631 Fuel Consumption: 42.3496
maximum steps, simulation is done ... 
elapsed_time: 106.692
Episode: 63 Exploration P: 0.1867 Total reward: -120.35508883915415 SOC: 0.5987 Cumulative_SOC_deviation: 7.8581 Fuel Consumption: 41.7746

maximum steps, simulation is done ... 
elapsed_time: 95.602
Episode: 100 Exploration P: 0.0740 Total reward: -125.56971526778243 SOC: 0.6002 Cumulative_SOC_deviation: 8.4761 Fuel Consumption: 40.8084
maximum steps, simulation is done ... 
elapsed_time: 93.255
Episode: 101 Exploration P: 0.0722 Total reward: -159.28213125720063 SOC: 0.5922 Cumulative_SOC_deviation: 11.9042 Fuel Consumption: 40.2406
maximum steps, simulation is done ... 
elapsed_time: 96.490
Episode: 102 Exploration P: 0.0706 Total reward: -119.52547148700681 SOC: 0.5987 Cumulative_SOC_deviation: 7.8751 Fuel Consumption: 40.7746
maximum steps, simulation is done ... 
elapsed_time: 96.335
Episode: 103 Exploration P: 0.0689 Total reward: -130.69600778835542 SOC: 0.5902 Cumulative_SOC_deviation: 9.0611 Fuel Consumption: 40.0853
maximum steps, simulation is done ... 
elapsed_time: 95.886
Episode: 104 Exploration P: 0.0673 Total reward: -176.62937063051453 SOC: 0.5985 Cumulative_SOC_deviation: 13.5948 Fuel Consumption: 40.681

maximum steps, simulation is done ... 
elapsed_time: 111.939
Episode: 141 Exploration P: 0.0308 Total reward: -186.13347610840734 SOC: 0.5959 Cumulative_SOC_deviation: 14.5941 Fuel Consumption: 40.1927
maximum steps, simulation is done ... 
elapsed_time: 116.867
Episode: 142 Exploration P: 0.0302 Total reward: -203.493819530827 SOC: 0.5975 Cumulative_SOC_deviation: 16.3137 Fuel Consumption: 40.3564
maximum steps, simulation is done ... 
elapsed_time: 108.420
Episode: 143 Exploration P: 0.0296 Total reward: -177.92335145554094 SOC: 0.5925 Cumulative_SOC_deviation: 13.7965 Fuel Consumption: 39.9582
maximum steps, simulation is done ... 
elapsed_time: 103.890
Episode: 144 Exploration P: 0.0291 Total reward: -189.3920431596982 SOC: 0.5928 Cumulative_SOC_deviation: 14.9481 Fuel Consumption: 39.9110
maximum steps, simulation is done ... 
elapsed_time: 104.206
Episode: 145 Exploration P: 0.0286 Total reward: -186.1893387019928 SOC: 0.5943 Cumulative_SOC_deviation: 14.5962 Fuel Consumption: 40

maximum steps, simulation is done ... 
elapsed_time: 97.549
Episode: 182 Exploration P: 0.0167 Total reward: -170.57594311659966 SOC: 0.5950 Cumulative_SOC_deviation: 12.9818 Fuel Consumption: 40.7577
maximum steps, simulation is done ... 
elapsed_time: 97.531
Episode: 183 Exploration P: 0.0165 Total reward: -197.09932799400428 SOC: 0.5953 Cumulative_SOC_deviation: 15.6564 Fuel Consumption: 40.5355
maximum steps, simulation is done ... 
elapsed_time: 97.508
Episode: 184 Exploration P: 0.0164 Total reward: -221.33068241261248 SOC: 0.5930 Cumulative_SOC_deviation: 18.1065 Fuel Consumption: 40.2655
maximum steps, simulation is done ... 
elapsed_time: 96.815
Episode: 185 Exploration P: 0.0162 Total reward: -185.58106008526806 SOC: 0.5946 Cumulative_SOC_deviation: 14.4829 Fuel Consumption: 40.7518
maximum steps, simulation is done ... 
elapsed_time: 96.097
Episode: 186 Exploration P: 0.0160 Total reward: -186.3101532197312 SOC: 0.5968 Cumulative_SOC_deviation: 14.5578 Fuel Consumption: 40.7

maximum steps, simulation is done ... 
elapsed_time: 92.601
Episode: 9 Exploration P: 0.7875 Total reward: -2570.306401526044 SOC: 0.9849 Cumulative_SOC_deviation: 249.9058 Fuel Consumption: 71.2479
maximum steps, simulation is done ... 
elapsed_time: 92.920
Episode: 10 Exploration P: 0.7664 Total reward: -2290.382633431071 SOC: 0.9505 Cumulative_SOC_deviation: 222.1776 Fuel Consumption: 68.6065
maximum steps, simulation is done ... 
elapsed_time: 92.925
Episode: 11 Exploration P: 0.7459 Total reward: -2299.8399594057964 SOC: 0.9408 Cumulative_SOC_deviation: 223.1966 Fuel Consumption: 67.8739
maximum steps, simulation is done ... 
elapsed_time: 92.870
Episode: 12 Exploration P: 0.7260 Total reward: -2245.979755648816 SOC: 0.9270 Cumulative_SOC_deviation: 217.9086 Fuel Consumption: 66.8936
maximum steps, simulation is done ... 
elapsed_time: 93.239
Episode: 13 Exploration P: 0.7066 Total reward: -2106.613990128606 SOC: 0.9212 Cumulative_SOC_deviation: 204.0071 Fuel Consumption: 66.5428


maximum steps, simulation is done ... 
elapsed_time: 94.964
Episode: 51 Exploration P: 0.2554 Total reward: -1928.4354518477617 SOC: 0.4658 Cumulative_SOC_deviation: 189.4147 Fuel Consumption: 34.2883
maximum steps, simulation is done ... 
elapsed_time: 94.916
Episode: 52 Exploration P: 0.2487 Total reward: -1939.7432569637094 SOC: 0.4491 Cumulative_SOC_deviation: 190.6796 Fuel Consumption: 32.9470
maximum steps, simulation is done ... 
elapsed_time: 95.106
Episode: 53 Exploration P: 0.2422 Total reward: -2133.7325813590423 SOC: 0.4368 Cumulative_SOC_deviation: 210.1405 Fuel Consumption: 32.3279
maximum steps, simulation is done ... 
elapsed_time: 94.887
Episode: 54 Exploration P: 0.2360 Total reward: -2151.197103902881 SOC: 0.4153 Cumulative_SOC_deviation: 212.0448 Fuel Consumption: 30.7492
maximum steps, simulation is done ... 
elapsed_time: 93.352
Episode: 55 Exploration P: 0.2298 Total reward: -2294.891603703125 SOC: 0.5331 Cumulative_SOC_deviation: 225.5929 Fuel Consumption: 38.96

maximum steps, simulation is done ... 
elapsed_time: 67.365
Episode: 93 Exploration P: 0.0874 Total reward: -162.34627845165215 SOC: 0.5975 Cumulative_SOC_deviation: 12.2023 Fuel Consumption: 40.3230
maximum steps, simulation is done ... 
elapsed_time: 68.365
Episode: 94 Exploration P: 0.0853 Total reward: -134.0982014682744 SOC: 0.5987 Cumulative_SOC_deviation: 9.3585 Fuel Consumption: 40.5136
maximum steps, simulation is done ... 
elapsed_time: 72.504
Episode: 95 Exploration P: 0.0833 Total reward: -137.44008747840817 SOC: 0.5962 Cumulative_SOC_deviation: 9.7004 Fuel Consumption: 40.4361
maximum steps, simulation is done ... 
elapsed_time: 71.939
Episode: 96 Exploration P: 0.0813 Total reward: -152.59643908675116 SOC: 0.5968 Cumulative_SOC_deviation: 11.2201 Fuel Consumption: 40.3957
maximum steps, simulation is done ... 
elapsed_time: 72.006
Episode: 97 Exploration P: 0.0794 Total reward: -150.91912918703423 SOC: 0.5971 Cumulative_SOC_deviation: 11.0626 Fuel Consumption: 40.2931
max

maximum steps, simulation is done ... 
elapsed_time: 66.779
Episode: 134 Exploration P: 0.0351 Total reward: -164.45139558858116 SOC: 0.5970 Cumulative_SOC_deviation: 12.4375 Fuel Consumption: 40.0761
maximum steps, simulation is done ... 
elapsed_time: 67.144
Episode: 135 Exploration P: 0.0344 Total reward: -174.4504333644058 SOC: 0.5935 Cumulative_SOC_deviation: 13.4635 Fuel Consumption: 39.8155
maximum steps, simulation is done ... 
elapsed_time: 67.658
Episode: 136 Exploration P: 0.0338 Total reward: -183.58309373504122 SOC: 0.5964 Cumulative_SOC_deviation: 14.3585 Fuel Consumption: 39.9979
maximum steps, simulation is done ... 
elapsed_time: 67.587
Episode: 137 Exploration P: 0.0331 Total reward: -173.45971958416945 SOC: 0.5959 Cumulative_SOC_deviation: 13.3572 Fuel Consumption: 39.8876
maximum steps, simulation is done ... 
elapsed_time: 67.819
Episode: 138 Exploration P: 0.0325 Total reward: -175.06108794307684 SOC: 0.5945 Cumulative_SOC_deviation: 13.5286 Fuel Consumption: 39.7

maximum steps, simulation is done ... 
elapsed_time: 67.653
Episode: 175 Exploration P: 0.0181 Total reward: -218.92430970799296 SOC: 0.5968 Cumulative_SOC_deviation: 17.9042 Fuel Consumption: 39.8824
maximum steps, simulation is done ... 
elapsed_time: 65.129
Episode: 176 Exploration P: 0.0179 Total reward: -173.20363969717292 SOC: 0.5951 Cumulative_SOC_deviation: 13.3416 Fuel Consumption: 39.7880
maximum steps, simulation is done ... 
elapsed_time: 64.984
Episode: 177 Exploration P: 0.0177 Total reward: -179.66903381620403 SOC: 0.5971 Cumulative_SOC_deviation: 13.9639 Fuel Consumption: 40.0299
maximum steps, simulation is done ... 
elapsed_time: 65.151
Episode: 178 Exploration P: 0.0175 Total reward: -189.8364271973194 SOC: 0.5962 Cumulative_SOC_deviation: 14.9884 Fuel Consumption: 39.9521
maximum steps, simulation is done ... 
elapsed_time: 64.710
Episode: 179 Exploration P: 0.0173 Total reward: -172.9314246705941 SOC: 0.5995 Cumulative_SOC_deviation: 13.2841 Fuel Consumption: 40.09

In [14]:
with open("DDPG3_400.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)