In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 
import scipy.io as sio

from vehicle_model_DDPG2 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
driving_cycle_path = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
driving_cycle = sio.loadmat(driving_cycle_path)
driving_cycle = driving_cycle["sch_cycle"][:, 1]
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, driving_cycle, battery_path, motor_path, 10)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    
    out = layers.Dense(512, activation="relu")(inputs)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    
    state_out = layers.Dense(16, activation="relu")(state_input)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 200
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 3000 

In [11]:
def initialization(reward_factor): 
    actor_model = get_actor() 
    critic_model = get_critic() 

    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
    
    buffer = Buffer(500000, BATCH_SIZE)
    env = Environment(cell_model, driving_cycle, battery_path, motor_path, reward_factor)
    return actor_model, critic_model, target_actor, target_critic, buffer, env 

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    actor_model.save_weights("./{}/actor_model_checkpoint".format(root))
    critic_model.save_weights("./{}/critic_model_checkpoint".format(root))
    target_actor.save_weights("./{}/target_actor_checkpoint".format(root))
    target_critic.save_weights("./{}/target_critic_checkpoint".format(root))
    print("model is saved..")

In [13]:
print(env.version)

num_trials = 3
reward_factor = 10
results_dict = {} 
for trial in range(num_trials): 
    print()
    print("Trial {}".format(trial))
    
    actor_model, critic_model, target_actor, target_critic, buffer, env = initialization(
        reward_factor
    )
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    for ep in range(total_episodes): 
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * steps)

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
    
#     root = "DDPG1_trial{}".format(trial+1)
#     save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs 
    }

3

Trial 0
maximum steps, simulation is done ... 
elapsed_time: 14.844
Episode: 1 Exploration P: 1.0000 Total reward: -5054.976724133867 SOC: 0.9999 Cumulative_SOC_deviation: 486.3844 Fuel Consumption: 191.1329
maximum steps, simulation is done ... 
elapsed_time: 15.155
Episode: 2 Exploration P: 1.0000 Total reward: -5020.941058499016 SOC: 0.9993 Cumulative_SOC_deviation: 483.1375 Fuel Consumption: 189.5661


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer c

maximum steps, simulation is done ... 
elapsed_time: 58.842
Episode: 28 Exploration P: 0.4689 Total reward: -902.1142879274925 SOC: 0.6584 Cumulative_SOC_deviation: 77.2035 Fuel Consumption: 130.0790
maximum steps, simulation is done ... 
elapsed_time: 58.883
Episode: 29 Exploration P: 0.4565 Total reward: -952.6971416487416 SOC: 0.7485 Cumulative_SOC_deviation: 81.4897 Fuel Consumption: 137.7997
maximum steps, simulation is done ... 
elapsed_time: 58.879
Episode: 30 Exploration P: 0.4444 Total reward: -1403.7738618311625 SOC: 0.5334 Cumulative_SOC_deviation: 128.2879 Fuel Consumption: 120.8947
maximum steps, simulation is done ... 
elapsed_time: 58.875
Episode: 31 Exploration P: 0.4326 Total reward: -1192.3097678657523 SOC: 0.5335 Cumulative_SOC_deviation: 107.1023 Fuel Consumption: 121.2864
maximum steps, simulation is done ... 
elapsed_time: 58.900
Episode: 32 Exploration P: 0.4212 Total reward: -2462.858237714001 SOC: 0.3778 Cumulative_SOC_deviation: 235.3282 Fuel Consumption: 109.

maximum steps, simulation is done ... 
elapsed_time: 59.499
Episode: 69 Exploration P: 0.1589 Total reward: -312.9058056651626 SOC: 0.5904 Cumulative_SOC_deviation: 19.7780 Fuel Consumption: 115.1257
maximum steps, simulation is done ... 
elapsed_time: 59.444
Episode: 70 Exploration P: 0.1548 Total reward: -294.46530411075804 SOC: 0.5892 Cumulative_SOC_deviation: 18.0789 Fuel Consumption: 113.6760
maximum steps, simulation is done ... 
elapsed_time: 59.716
Episode: 71 Exploration P: 0.1509 Total reward: -284.07121153582744 SOC: 0.5843 Cumulative_SOC_deviation: 16.9072 Fuel Consumption: 114.9996
maximum steps, simulation is done ... 
elapsed_time: 59.287
Episode: 72 Exploration P: 0.1471 Total reward: -443.68210326049467 SOC: 0.5752 Cumulative_SOC_deviation: 32.7767 Fuel Consumption: 115.9155
maximum steps, simulation is done ... 
elapsed_time: 59.485
Episode: 73 Exploration P: 0.1434 Total reward: -337.24283202753 SOC: 0.5983 Cumulative_SOC_deviation: 22.1485 Fuel Consumption: 115.7577

maximum steps, simulation is done ... 
elapsed_time: 59.585
Episode: 110 Exploration P: 0.0583 Total reward: -416.2817637104159 SOC: 0.5804 Cumulative_SOC_deviation: 30.6366 Fuel Consumption: 109.9155
maximum steps, simulation is done ... 
elapsed_time: 59.769
Episode: 111 Exploration P: 0.0570 Total reward: -391.07296271024865 SOC: 0.5665 Cumulative_SOC_deviation: 28.0761 Fuel Consumption: 110.3121
maximum steps, simulation is done ... 
elapsed_time: 59.663
Episode: 112 Exploration P: 0.0557 Total reward: -426.5131675753928 SOC: 0.5830 Cumulative_SOC_deviation: 31.5264 Fuel Consumption: 111.2491
maximum steps, simulation is done ... 
elapsed_time: 59.617
Episode: 113 Exploration P: 0.0545 Total reward: -392.9292824786894 SOC: 0.5525 Cumulative_SOC_deviation: 28.4608 Fuel Consumption: 108.3211
maximum steps, simulation is done ... 
elapsed_time: 59.598
Episode: 114 Exploration P: 0.0533 Total reward: -436.7070971073276 SOC: 0.5847 Cumulative_SOC_deviation: 32.6308 Fuel Consumption: 110

maximum steps, simulation is done ... 
elapsed_time: 53.171
Episode: 151 Exploration P: 0.0257 Total reward: -437.58676565973093 SOC: 0.5840 Cumulative_SOC_deviation: 32.1625 Fuel Consumption: 115.9617
maximum steps, simulation is done ... 
elapsed_time: 53.366
Episode: 152 Exploration P: 0.0252 Total reward: -396.19646584629226 SOC: 0.5868 Cumulative_SOC_deviation: 28.2482 Fuel Consumption: 113.7141
maximum steps, simulation is done ... 
elapsed_time: 53.660
Episode: 153 Exploration P: 0.0248 Total reward: -341.75659426158296 SOC: 0.5975 Cumulative_SOC_deviation: 22.7817 Fuel Consumption: 113.9399
maximum steps, simulation is done ... 
elapsed_time: 53.381
Episode: 154 Exploration P: 0.0244 Total reward: -335.564454792638 SOC: 0.5946 Cumulative_SOC_deviation: 22.1385 Fuel Consumption: 114.1790
maximum steps, simulation is done ... 
elapsed_time: 53.378
Episode: 155 Exploration P: 0.0240 Total reward: -324.63012541323366 SOC: 0.5988 Cumulative_SOC_deviation: 21.0867 Fuel Consumption: 1

maximum steps, simulation is done ... 
elapsed_time: 53.480
Episode: 192 Exploration P: 0.0151 Total reward: -324.070181865736 SOC: 0.5903 Cumulative_SOC_deviation: 20.7216 Fuel Consumption: 116.8546
maximum steps, simulation is done ... 
elapsed_time: 53.394
Episode: 193 Exploration P: 0.0149 Total reward: -361.8505376579091 SOC: 0.5904 Cumulative_SOC_deviation: 24.4584 Fuel Consumption: 117.2663
maximum steps, simulation is done ... 
elapsed_time: 53.147
Episode: 194 Exploration P: 0.0148 Total reward: -276.12704219851355 SOC: 0.5980 Cumulative_SOC_deviation: 15.9421 Fuel Consumption: 116.7060
maximum steps, simulation is done ... 
elapsed_time: 53.267
Episode: 195 Exploration P: 0.0147 Total reward: -337.9520283705818 SOC: 0.5890 Cumulative_SOC_deviation: 22.1825 Fuel Consumption: 116.1271
maximum steps, simulation is done ... 
elapsed_time: 53.326
Episode: 196 Exploration P: 0.0146 Total reward: -383.65847230892587 SOC: 0.5927 Cumulative_SOC_deviation: 26.7863 Fuel Consumption: 115

maximum steps, simulation is done ... 
elapsed_time: 52.789
Episode: 19 Exploration P: 0.5976 Total reward: -2784.7333912099807 SOC: 0.9956 Cumulative_SOC_deviation: 263.0884 Fuel Consumption: 153.8491
maximum steps, simulation is done ... 
elapsed_time: 52.856
Episode: 20 Exploration P: 0.5816 Total reward: -2552.5202843858046 SOC: 0.9871 Cumulative_SOC_deviation: 239.8846 Fuel Consumption: 153.6739
maximum steps, simulation is done ... 
elapsed_time: 52.873
Episode: 21 Exploration P: 0.5662 Total reward: -1891.185817765101 SOC: 0.9281 Cumulative_SOC_deviation: 174.0790 Fuel Consumption: 150.3959
maximum steps, simulation is done ... 
elapsed_time: 52.986
Episode: 22 Exploration P: 0.5511 Total reward: -1443.2458921827513 SOC: 0.8601 Cumulative_SOC_deviation: 129.8365 Fuel Consumption: 144.8812
maximum steps, simulation is done ... 
elapsed_time: 52.872
Episode: 23 Exploration P: 0.5364 Total reward: -959.6262759657568 SOC: 0.7297 Cumulative_SOC_deviation: 82.2589 Fuel Consumption: 13

maximum steps, simulation is done ... 
elapsed_time: 53.855
Episode: 60 Exploration P: 0.2006 Total reward: -240.69908355678496 SOC: 0.5942 Cumulative_SOC_deviation: 12.3572 Fuel Consumption: 117.1275
maximum steps, simulation is done ... 
elapsed_time: 53.662
Episode: 61 Exploration P: 0.1954 Total reward: -246.69007235260216 SOC: 0.5873 Cumulative_SOC_deviation: 12.9484 Fuel Consumption: 117.2057
maximum steps, simulation is done ... 
elapsed_time: 53.816
Episode: 62 Exploration P: 0.1904 Total reward: -243.50731248327696 SOC: 0.5927 Cumulative_SOC_deviation: 12.8434 Fuel Consumption: 115.0734
maximum steps, simulation is done ... 
elapsed_time: 54.040
Episode: 63 Exploration P: 0.1855 Total reward: -228.62346761377623 SOC: 0.5908 Cumulative_SOC_deviation: 11.3152 Fuel Consumption: 115.4712
maximum steps, simulation is done ... 
elapsed_time: 53.858
Episode: 64 Exploration P: 0.1808 Total reward: -236.3524464413362 SOC: 0.5866 Cumulative_SOC_deviation: 12.2627 Fuel Consumption: 113.7

maximum steps, simulation is done ... 
elapsed_time: 53.486
Episode: 101 Exploration P: 0.0718 Total reward: -740.2612337389321 SOC: 0.5534 Cumulative_SOC_deviation: 62.8795 Fuel Consumption: 111.4661
maximum steps, simulation is done ... 
elapsed_time: 53.583
Episode: 102 Exploration P: 0.0701 Total reward: -762.9263652695829 SOC: 0.5496 Cumulative_SOC_deviation: 64.9244 Fuel Consumption: 113.6828
maximum steps, simulation is done ... 
elapsed_time: 53.193
Episode: 103 Exploration P: 0.0685 Total reward: -799.420003909448 SOC: 0.5450 Cumulative_SOC_deviation: 68.8724 Fuel Consumption: 110.6956
maximum steps, simulation is done ... 
elapsed_time: 50.867
Episode: 104 Exploration P: 0.0669 Total reward: -863.5899765958013 SOC: 0.5382 Cumulative_SOC_deviation: 75.2421 Fuel Consumption: 111.1692
maximum steps, simulation is done ... 
elapsed_time: 55.459
Episode: 105 Exploration P: 0.0654 Total reward: -914.0441178799887 SOC: 0.5360 Cumulative_SOC_deviation: 80.4598 Fuel Consumption: 109.4

maximum steps, simulation is done ... 
elapsed_time: 47.421
Episode: 142 Exploration P: 0.0301 Total reward: -655.3889612249358 SOC: 0.5605 Cumulative_SOC_deviation: 54.8946 Fuel Consumption: 106.4431
maximum steps, simulation is done ... 
elapsed_time: 47.451
Episode: 143 Exploration P: 0.0295 Total reward: -691.7262120473654 SOC: 0.5578 Cumulative_SOC_deviation: 58.5636 Fuel Consumption: 106.0904
maximum steps, simulation is done ... 
elapsed_time: 47.426
Episode: 144 Exploration P: 0.0290 Total reward: -727.3268685070549 SOC: 0.5548 Cumulative_SOC_deviation: 62.1599 Fuel Consumption: 105.7277
maximum steps, simulation is done ... 
elapsed_time: 47.111
Episode: 145 Exploration P: 0.0285 Total reward: -456.02524333404506 SOC: 0.5790 Cumulative_SOC_deviation: 34.7750 Fuel Consumption: 108.2753
maximum steps, simulation is done ... 
elapsed_time: 47.320
Episode: 146 Exploration P: 0.0280 Total reward: -275.39760977787387 SOC: 0.5942 Cumulative_SOC_deviation: 16.6532 Fuel Consumption: 10

maximum steps, simulation is done ... 
elapsed_time: 47.331
Episode: 183 Exploration P: 0.0165 Total reward: -268.5040272210152 SOC: 0.5932 Cumulative_SOC_deviation: 16.1241 Fuel Consumption: 107.2629
maximum steps, simulation is done ... 
elapsed_time: 47.098
Episode: 184 Exploration P: 0.0163 Total reward: -256.1955088767493 SOC: 0.5914 Cumulative_SOC_deviation: 14.9087 Fuel Consumption: 107.1083
maximum steps, simulation is done ... 
elapsed_time: 47.512
Episode: 185 Exploration P: 0.0162 Total reward: -277.2211208092998 SOC: 0.5930 Cumulative_SOC_deviation: 16.9888 Fuel Consumption: 107.3333
maximum steps, simulation is done ... 
elapsed_time: 47.423
Episode: 186 Exploration P: 0.0160 Total reward: -282.8881986418181 SOC: 0.5912 Cumulative_SOC_deviation: 17.5513 Fuel Consumption: 107.3754
maximum steps, simulation is done ... 
elapsed_time: 47.171
Episode: 187 Exploration P: 0.0158 Total reward: -271.0170744315684 SOC: 0.5909 Cumulative_SOC_deviation: 16.4204 Fuel Consumption: 106.

maximum steps, simulation is done ... 
elapsed_time: 46.631
Episode: 10 Exploration P: 0.7623 Total reward: -4244.468057303334 SOC: 1.0000 Cumulative_SOC_deviation: 408.4517 Fuel Consumption: 159.9514
maximum steps, simulation is done ... 
elapsed_time: 46.446
Episode: 11 Exploration P: 0.7419 Total reward: -4185.845449360911 SOC: 0.9994 Cumulative_SOC_deviation: 402.7317 Fuel Consumption: 158.5289
maximum steps, simulation is done ... 
elapsed_time: 46.764
Episode: 12 Exploration P: 0.7221 Total reward: -4012.900536142244 SOC: 0.9988 Cumulative_SOC_deviation: 385.5856 Fuel Consumption: 157.0446
maximum steps, simulation is done ... 
elapsed_time: 46.693
Episode: 13 Exploration P: 0.7028 Total reward: -4160.364172069229 SOC: 0.9963 Cumulative_SOC_deviation: 400.3691 Fuel Consumption: 156.6735
maximum steps, simulation is done ... 
elapsed_time: 46.715
Episode: 14 Exploration P: 0.6840 Total reward: -3473.071993022573 SOC: 0.9954 Cumulative_SOC_deviation: 331.7646 Fuel Consumption: 155.

maximum steps, simulation is done ... 
elapsed_time: 47.263
Episode: 51 Exploration P: 0.2540 Total reward: -262.95618142469584 SOC: 0.5920 Cumulative_SOC_deviation: 14.4048 Fuel Consumption: 118.9087
maximum steps, simulation is done ... 
elapsed_time: 47.313
Episode: 52 Exploration P: 0.2474 Total reward: -239.05271250568788 SOC: 0.5887 Cumulative_SOC_deviation: 12.3258 Fuel Consumption: 115.7946
maximum steps, simulation is done ... 
elapsed_time: 46.994
Episode: 53 Exploration P: 0.2410 Total reward: -266.81773193822346 SOC: 0.5905 Cumulative_SOC_deviation: 15.1820 Fuel Consumption: 114.9979
maximum steps, simulation is done ... 
elapsed_time: 47.506
Episode: 54 Exploration P: 0.2347 Total reward: -249.06098142777668 SOC: 0.5921 Cumulative_SOC_deviation: 13.1892 Fuel Consumption: 117.1693
maximum steps, simulation is done ... 
elapsed_time: 47.399
Episode: 55 Exploration P: 0.2286 Total reward: -261.0562305216368 SOC: 0.5956 Cumulative_SOC_deviation: 14.5237 Fuel Consumption: 115.8

maximum steps, simulation is done ... 
elapsed_time: 47.242
Episode: 92 Exploration P: 0.0892 Total reward: -367.77607922179504 SOC: 0.5837 Cumulative_SOC_deviation: 25.6234 Fuel Consumption: 111.5416
maximum steps, simulation is done ... 
elapsed_time: 47.449
Episode: 93 Exploration P: 0.0870 Total reward: -331.7369565256399 SOC: 0.5893 Cumulative_SOC_deviation: 21.9415 Fuel Consumption: 112.3224
maximum steps, simulation is done ... 
elapsed_time: 47.448
Episode: 94 Exploration P: 0.0849 Total reward: -321.1838833674546 SOC: 0.5917 Cumulative_SOC_deviation: 20.8944 Fuel Consumption: 112.2398
maximum steps, simulation is done ... 
elapsed_time: 47.444
Episode: 95 Exploration P: 0.0829 Total reward: -374.56309188437825 SOC: 0.5891 Cumulative_SOC_deviation: 26.2487 Fuel Consumption: 112.0756
maximum steps, simulation is done ... 
elapsed_time: 47.288
Episode: 96 Exploration P: 0.0809 Total reward: -390.3848312329148 SOC: 0.5882 Cumulative_SOC_deviation: 27.8601 Fuel Consumption: 111.783

maximum steps, simulation is done ... 
elapsed_time: 47.359
Episode: 133 Exploration P: 0.0357 Total reward: -249.4055376050516 SOC: 0.5961 Cumulative_SOC_deviation: 13.9749 Fuel Consumption: 109.6566
maximum steps, simulation is done ... 
elapsed_time: 47.543
Episode: 134 Exploration P: 0.0350 Total reward: -207.61580595936312 SOC: 0.5990 Cumulative_SOC_deviation: 9.7962 Fuel Consumption: 109.6542
maximum steps, simulation is done ... 
elapsed_time: 47.262
Episode: 135 Exploration P: 0.0343 Total reward: -211.53224295489372 SOC: 0.5889 Cumulative_SOC_deviation: 10.3277 Fuel Consumption: 108.2553
maximum steps, simulation is done ... 
elapsed_time: 47.554
Episode: 136 Exploration P: 0.0336 Total reward: -243.55767619834106 SOC: 0.5922 Cumulative_SOC_deviation: 13.3920 Fuel Consumption: 109.6381
maximum steps, simulation is done ... 
elapsed_time: 47.787
Episode: 137 Exploration P: 0.0330 Total reward: -194.53176729757138 SOC: 0.5842 Cumulative_SOC_deviation: 8.6850 Fuel Consumption: 10

maximum steps, simulation is done ... 
elapsed_time: 47.768
Episode: 174 Exploration P: 0.0183 Total reward: -254.90705215923225 SOC: 0.5960 Cumulative_SOC_deviation: 14.6093 Fuel Consumption: 108.8136
maximum steps, simulation is done ... 
elapsed_time: 47.244
Episode: 175 Exploration P: 0.0181 Total reward: -304.76164148897385 SOC: 0.5986 Cumulative_SOC_deviation: 19.5806 Fuel Consumption: 108.9554
maximum steps, simulation is done ... 
elapsed_time: 47.529
Episode: 176 Exploration P: 0.0179 Total reward: -257.87413075446744 SOC: 0.5982 Cumulative_SOC_deviation: 14.9734 Fuel Consumption: 108.1405
maximum steps, simulation is done ... 
elapsed_time: 47.622
Episode: 177 Exploration P: 0.0177 Total reward: -288.0613748277757 SOC: 0.5922 Cumulative_SOC_deviation: 18.0406 Fuel Consumption: 107.6558
maximum steps, simulation is done ... 
elapsed_time: 47.459
Episode: 178 Exploration P: 0.0175 Total reward: -314.94236374883235 SOC: 0.5959 Cumulative_SOC_deviation: 20.6684 Fuel Consumption: 

In [14]:
with open("DDPG2_modelChange.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)