In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 

from vehicle_model_DDPG1 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_e-4wd_Battery.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_id_75_110_Westinghouse.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    
    out = layers.Dense(512, activation="relu")(inputs)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    
    state_out = layers.Dense(16, activation="relu")(state_input)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 200
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 3000 

In [11]:
def initialization(reward_factor): 
    actor_model = get_actor() 
    critic_model = get_critic() 

    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
    
    buffer = Buffer(500000, BATCH_SIZE)
    env = Environment(cell_model, drving_cycle, battery_path, motor_path, reward_factor)
    return actor_model, critic_model, target_actor, target_critic, buffer, env 

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    actor_model.save_weights("./{}/actor_model_checkpoint".format(root))
    critic_model.save_weights("./{}/critic_model_checkpoint".format(root))
    target_actor.save_weights("./{}/target_actor_checkpoint".format(root))
    target_critic.save_weights("./{}/target_critic_checkpoint".format(root))
    print("model is saved..")

In [13]:
print(env.version)

num_trials = 3
reward_factor = 10
results_dict = {} 
for trial in range(num_trials): 
    print()
    print("Trial {}".format(trial))
    
    actor_model, critic_model, target_actor, target_critic, buffer, env = initialization(
        reward_factor
    )
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    for ep in range(total_episodes): 
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * steps)

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
    
#     root = "DDPG1_trial{}".format(trial+1)
#     save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs 
    }

1

Trial 0
Available condition is not avail... SOC: 1
elapsed_time: 20.235
Episode: 1 Exploration P: 1.0000 Total reward: -3271.227313010566 SOC: 1.0000 Cumulative_SOC_deviation: 319.4656 Fuel Consumption: 76.5715
Available condition is not avail... SOC: 1
elapsed_time: 22.549
Episode: 2 Exploration P: 1.0000 Total reward: -3251.587593133043 SOC: 1.0000 Cumulative_SOC_deviation: 317.4934 Fuel Consumption: 76.6540


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base L

maximum steps, simulation is done ... 
elapsed_time: 85.159
Episode: 28 Exploration P: 0.4720 Total reward: -573.6742460196425 SOC: 0.7215 Cumulative_SOC_deviation: 52.1515 Fuel Consumption: 52.1593
maximum steps, simulation is done ... 
elapsed_time: 85.567
Episode: 29 Exploration P: 0.4595 Total reward: -432.50221287011357 SOC: 0.6500 Cumulative_SOC_deviation: 38.5605 Fuel Consumption: 46.8971
maximum steps, simulation is done ... 
elapsed_time: 85.172
Episode: 30 Exploration P: 0.4473 Total reward: -522.546509391421 SOC: 0.6742 Cumulative_SOC_deviation: 47.3988 Fuel Consumption: 48.5583
maximum steps, simulation is done ... 
elapsed_time: 85.237
Episode: 31 Exploration P: 0.4355 Total reward: -573.3129831955196 SOC: 0.6781 Cumulative_SOC_deviation: 52.4156 Fuel Consumption: 49.1568
maximum steps, simulation is done ... 
elapsed_time: 85.327
Episode: 32 Exploration P: 0.4240 Total reward: -475.0668559868232 SOC: 0.6613 Cumulative_SOC_deviation: 42.7097 Fuel Consumption: 47.9702
maxim

maximum steps, simulation is done ... 
elapsed_time: 86.352
Episode: 69 Exploration P: 0.1599 Total reward: -3433.982647052714 SOC: 0.2450 Cumulative_SOC_deviation: 341.4573 Fuel Consumption: 19.4096
Available condition is not avail... SOC: 1
elapsed_time: 81.919
Episode: 70 Exploration P: 0.1560 Total reward: -2947.5508008653296 SOC: 1.0000 Cumulative_SOC_deviation: 285.3986 Fuel Consumption: 93.5652
Available condition is not avail... SOC: 1
elapsed_time: 82.080
Episode: 71 Exploration P: 0.1522 Total reward: -4635.782122571404 SOC: 1.0000 Cumulative_SOC_deviation: 451.3285 Fuel Consumption: 122.4974
maximum steps, simulation is done ... 
elapsed_time: 85.614
Episode: 72 Exploration P: 0.1484 Total reward: -3798.7123390075976 SOC: 0.8264 Cumulative_SOC_deviation: 373.8801 Fuel Consumption: 59.9116
maximum steps, simulation is done ... 
elapsed_time: 85.646
Episode: 73 Exploration P: 0.1446 Total reward: -3591.1457260921443 SOC: 0.2330 Cumulative_SOC_deviation: 357.2603 Fuel Consumpti

  2 * r_dis)) * (v_dis - (v_dis ** 2 - 4 * r_dis * p_bat) ** (0.5)) * (p_bat >= 0)
  2 * r_dis)) * (v_dis - (v_dis ** 2 - 4 * r_dis * p_bat) ** (0.5)) * (p_bat >= 0)


SOC is nan...
elapsed_time: 82.449
Episode: 109 Exploration P: 0.0607 Total reward: -5145.929338279072 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 8.8948
maximum steps, simulation is done ... 
elapsed_time: 87.200
Episode: 110 Exploration P: 0.0593 Total reward: -4083.775549039465 SOC: 0.4272 Cumulative_SOC_deviation: 404.9707 Fuel Consumption: 34.0681
maximum steps, simulation is done ... 
elapsed_time: 87.499
Episode: 111 Exploration P: 0.0580 Total reward: -4930.358143564502 SOC: 0.9865 Cumulative_SOC_deviation: 481.3764 Fuel Consumption: 116.5945
Available condition is not avail... SOC: 1
elapsed_time: 83.250
Episode: 112 Exploration P: 0.0567 Total reward: -4127.256259724867 SOC: 1.0000 Cumulative_SOC_deviation: 400.7793 Fuel Consumption: 119.4633
maximum steps, simulation is done ... 
elapsed_time: 86.845
Episode: 113 Exploration P: 0.0555 Total reward: -1030.241971940938 SOC: 0.9298 Cumulative_SOC_deviation: 96.0060 Fuel Consumption: 70.1820
Available condition is n

Available condition is not avail... SOC: 1
elapsed_time: 82.292
Episode: 152 Exploration P: 0.0269 Total reward: -4691.674835076311 SOC: 1.0000 Cumulative_SOC_deviation: 456.3224 Fuel Consumption: 128.4509
Available condition is not avail... SOC: 1
elapsed_time: 81.881
Episode: 153 Exploration P: 0.0264 Total reward: -4706.075533167137 SOC: 1.0000 Cumulative_SOC_deviation: 457.7160 Fuel Consumption: 128.9153
Available condition is not avail... SOC: 1
elapsed_time: 82.319
Episode: 154 Exploration P: 0.0260 Total reward: -4706.322364106391 SOC: 1.0000 Cumulative_SOC_deviation: 457.7521 Fuel Consumption: 128.8017
Available condition is not avail... SOC: 1
elapsed_time: 83.233
Episode: 155 Exploration P: 0.0256 Total reward: -4702.687200953368 SOC: 1.0000 Cumulative_SOC_deviation: 457.3320 Fuel Consumption: 129.3676
Available condition is not avail... SOC: 1
elapsed_time: 83.150
Episode: 156 Exploration P: 0.0252 Total reward: -4694.672010614948 SOC: 1.0000 Cumulative_SOC_deviation: 456.60

maximum steps, simulation is done ... 
elapsed_time: 86.425
Episode: 196 Exploration P: 0.0158 Total reward: -3298.847048116787 SOC: 0.9848 Cumulative_SOC_deviation: 322.0821 Fuel Consumption: 78.0256
SOC is nan...
elapsed_time: 73.684
Episode: 197 Exploration P: 0.0156 Total reward: -4734.762855848432 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 5.4294
SOC is nan...
elapsed_time: 66.513
Episode: 198 Exploration P: 0.0155 Total reward: -4171.477028270707 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 3.3807
maximum steps, simulation is done ... 
elapsed_time: 84.209
Episode: 199 Exploration P: 0.0154 Total reward: -3061.7169165653672 SOC: 0.9263 Cumulative_SOC_deviation: 299.0593 Fuel Consumption: 71.1234
Available condition is not avail... SOC: 1
elapsed_time: 85.611
Episode: 200 Exploration P: 0.0152 Total reward: -4667.342771169096 SOC: 1.0000 Cumulative_SOC_deviation: 454.3205 Fuel Consumption: 124.1380

Trial 1
Available condition is not avail... SOC: 1
elaps

maximum steps, simulation is done ... 
elapsed_time: 86.529
Episode: 23 Exploration P: 0.5458 Total reward: -1110.6879707849857 SOC: 0.8069 Cumulative_SOC_deviation: 105.2369 Fuel Consumption: 58.3193
maximum steps, simulation is done ... 
elapsed_time: 86.076
Episode: 24 Exploration P: 0.5313 Total reward: -789.3929486672702 SOC: 0.7549 Cumulative_SOC_deviation: 73.5087 Fuel Consumption: 54.3055
maximum steps, simulation is done ... 
elapsed_time: 87.372
Episode: 25 Exploration P: 0.5172 Total reward: -819.0434515546923 SOC: 0.7483 Cumulative_SOC_deviation: 76.5047 Fuel Consumption: 53.9960
maximum steps, simulation is done ... 
elapsed_time: 86.471
Episode: 26 Exploration P: 0.5035 Total reward: -587.5269201372367 SOC: 0.7078 Cumulative_SOC_deviation: 53.6234 Fuel Consumption: 51.2926
maximum steps, simulation is done ... 
elapsed_time: 86.917
Episode: 27 Exploration P: 0.4901 Total reward: -696.121358381542 SOC: 0.7376 Cumulative_SOC_deviation: 64.2961 Fuel Consumption: 53.1602
maxi

maximum steps, simulation is done ... 
elapsed_time: 84.377
Episode: 64 Exploration P: 0.1838 Total reward: -2762.2026034635455 SOC: 0.3341 Cumulative_SOC_deviation: 273.7077 Fuel Consumption: 25.1258
maximum steps, simulation is done ... 
elapsed_time: 86.591
Episode: 65 Exploration P: 0.1791 Total reward: -3025.2290019765987 SOC: 0.2995 Cumulative_SOC_deviation: 300.2435 Fuel Consumption: 22.7939
maximum steps, simulation is done ... 
elapsed_time: 86.768
Episode: 66 Exploration P: 0.1745 Total reward: -2672.720793992342 SOC: 0.3206 Cumulative_SOC_deviation: 264.8678 Fuel Consumption: 24.0424
maximum steps, simulation is done ... 
elapsed_time: 86.521
Episode: 67 Exploration P: 0.1701 Total reward: -2621.1290676241733 SOC: 0.3484 Cumulative_SOC_deviation: 259.4992 Fuel Consumption: 26.1370
maximum steps, simulation is done ... 
elapsed_time: 86.812
Episode: 68 Exploration P: 0.1657 Total reward: -3183.4647172241007 SOC: 0.2779 Cumulative_SOC_deviation: 316.2064 Fuel Consumption: 21.4

maximum steps, simulation is done ... 
elapsed_time: 86.717
Episode: 105 Exploration P: 0.0666 Total reward: -3203.0436409483004 SOC: 0.3149 Cumulative_SOC_deviation: 317.9079 Fuel Consumption: 23.9646
maximum steps, simulation is done ... 
elapsed_time: 86.665
Episode: 106 Exploration P: 0.0651 Total reward: -3406.3749629924973 SOC: 0.2963 Cumulative_SOC_deviation: 338.3656 Fuel Consumption: 22.7190
maximum steps, simulation is done ... 
elapsed_time: 86.739
Episode: 107 Exploration P: 0.0636 Total reward: -3363.520622498408 SOC: 0.3236 Cumulative_SOC_deviation: 333.8831 Fuel Consumption: 24.6897
maximum steps, simulation is done ... 
elapsed_time: 86.875
Episode: 108 Exploration P: 0.0621 Total reward: -4669.514188001211 SOC: 0.9726 Cumulative_SOC_deviation: 457.2223 Fuel Consumption: 97.2912
maximum steps, simulation is done ... 
elapsed_time: 86.874
Episode: 109 Exploration P: 0.0607 Total reward: -3802.060085908361 SOC: 0.2039 Cumulative_SOC_deviation: 378.5671 Fuel Consumption: 1

maximum steps, simulation is done ... 
elapsed_time: 87.687
Episode: 146 Exploration P: 0.0286 Total reward: -4058.2639854720505 SOC: 0.1736 Cumulative_SOC_deviation: 404.3938 Fuel Consumption: 14.3257
maximum steps, simulation is done ... 
elapsed_time: 87.437
Episode: 147 Exploration P: 0.0281 Total reward: -4073.584026146801 SOC: 0.1847 Cumulative_SOC_deviation: 405.8319 Fuel Consumption: 15.2653
maximum steps, simulation is done ... 
elapsed_time: 87.150
Episode: 148 Exploration P: 0.0276 Total reward: -4215.499259326293 SOC: 0.1629 Cumulative_SOC_deviation: 420.1678 Fuel Consumption: 13.8210
maximum steps, simulation is done ... 
elapsed_time: 87.082
Episode: 149 Exploration P: 0.0271 Total reward: -4104.150025059773 SOC: 0.1702 Cumulative_SOC_deviation: 408.9986 Fuel Consumption: 14.1641
maximum steps, simulation is done ... 
elapsed_time: 87.006
Episode: 150 Exploration P: 0.0267 Total reward: -4141.8162725603925 SOC: 0.1588 Cumulative_SOC_deviation: 412.8409 Fuel Consumption: 1

maximum steps, simulation is done ... 
elapsed_time: 78.357
Episode: 188 Exploration P: 0.0159 Total reward: -3954.7518131738393 SOC: 0.2286 Cumulative_SOC_deviation: 393.6440 Fuel Consumption: 18.3117
maximum steps, simulation is done ... 
elapsed_time: 75.706
Episode: 189 Exploration P: 0.0158 Total reward: -3823.077166621346 SOC: 0.2445 Cumulative_SOC_deviation: 380.3671 Fuel Consumption: 19.4066
maximum steps, simulation is done ... 
elapsed_time: 76.742
Episode: 190 Exploration P: 0.0156 Total reward: -3927.0398171295715 SOC: 0.2151 Cumulative_SOC_deviation: 390.9762 Fuel Consumption: 17.2775
maximum steps, simulation is done ... 
elapsed_time: 79.220
Episode: 191 Exploration P: 0.0154 Total reward: -3962.480278758012 SOC: 0.2194 Cumulative_SOC_deviation: 394.4738 Fuel Consumption: 17.7420
maximum steps, simulation is done ... 
elapsed_time: 78.868
Episode: 192 Exploration P: 0.0153 Total reward: -3893.874087620104 SOC: 0.2168 Cumulative_SOC_deviation: 387.6453 Fuel Consumption: 1

Available condition is not avail... SOC: 1
elapsed_time: 73.242
Episode: 14 Exploration P: 0.6970 Total reward: -4023.0149844317098 SOC: 1.0000 Cumulative_SOC_deviation: 393.0781 Fuel Consumption: 92.2344
Available condition is not avail... SOC: 1
elapsed_time: 73.022
Episode: 15 Exploration P: 0.6793 Total reward: -4123.151413687472 SOC: 1.0000 Cumulative_SOC_deviation: 402.9266 Fuel Consumption: 93.8853
Available condition is not avail... SOC: 1
elapsed_time: 73.246
Episode: 16 Exploration P: 0.6621 Total reward: -4090.7399871090047 SOC: 1.0000 Cumulative_SOC_deviation: 399.6442 Fuel Consumption: 94.2981
Available condition is not avail... SOC: 1
elapsed_time: 74.138
Episode: 17 Exploration P: 0.6453 Total reward: -4103.377028343149 SOC: 1.0000 Cumulative_SOC_deviation: 401.0730 Fuel Consumption: 92.6472
Available condition is not avail... SOC: 1
elapsed_time: 73.161
Episode: 18 Exploration P: 0.6289 Total reward: -4156.803696412388 SOC: 1.0000 Cumulative_SOC_deviation: 406.1144 Fuel

Available condition is not avail... SOC: 1
elapsed_time: 74.892
Episode: 54 Exploration P: 0.2519 Total reward: -4565.186607356683 SOC: 1.0000 Cumulative_SOC_deviation: 444.9458 Fuel Consumption: 115.7288
Available condition is not avail... SOC: 1
elapsed_time: 74.785
Episode: 55 Exploration P: 0.2456 Total reward: -4565.237978782547 SOC: 1.0000 Cumulative_SOC_deviation: 444.7363 Fuel Consumption: 117.8750
Available condition is not avail... SOC: 1
elapsed_time: 74.436
Episode: 56 Exploration P: 0.2396 Total reward: -4554.489310565856 SOC: 1.0000 Cumulative_SOC_deviation: 443.8337 Fuel Consumption: 116.1519
Available condition is not avail... SOC: 1
elapsed_time: 74.641
Episode: 57 Exploration P: 0.2336 Total reward: -4585.635333975136 SOC: 1.0000 Cumulative_SOC_deviation: 446.9153 Fuel Consumption: 116.4821
Available condition is not avail... SOC: 1
elapsed_time: 74.869
Episode: 58 Exploration P: 0.2279 Total reward: -4611.319850557636 SOC: 1.0000 Cumulative_SOC_deviation: 449.3342 Fu

Available condition is not avail... SOC: 1
elapsed_time: 75.390
Episode: 94 Exploration P: 0.0951 Total reward: -4676.904337097648 SOC: 1.0000 Cumulative_SOC_deviation: 455.1280 Fuel Consumption: 125.6239
Available condition is not avail... SOC: 1
elapsed_time: 74.709
Episode: 95 Exploration P: 0.0929 Total reward: -4668.152077093455 SOC: 1.0000 Cumulative_SOC_deviation: 454.2033 Fuel Consumption: 126.1192
Available condition is not avail... SOC: 1
elapsed_time: 73.945
Episode: 96 Exploration P: 0.0908 Total reward: -4683.244974231901 SOC: 1.0000 Cumulative_SOC_deviation: 455.6919 Fuel Consumption: 126.3256
Available condition is not avail... SOC: 1
elapsed_time: 73.918
Episode: 97 Exploration P: 0.0887 Total reward: -4697.792306465649 SOC: 1.0000 Cumulative_SOC_deviation: 457.1838 Fuel Consumption: 125.9541
Available condition is not avail... SOC: 1
elapsed_time: 73.844
Episode: 98 Exploration P: 0.0867 Total reward: -4688.273871078523 SOC: 1.0000 Cumulative_SOC_deviation: 456.1587 Fu

Available condition is not avail... SOC: 1
elapsed_time: 74.367
Episode: 134 Exploration P: 0.0400 Total reward: -4686.332910358741 SOC: 1.0000 Cumulative_SOC_deviation: 455.8728 Fuel Consumption: 127.6050
Available condition is not avail... SOC: 1
elapsed_time: 74.461
Episode: 135 Exploration P: 0.0392 Total reward: -4700.074343832959 SOC: 1.0000 Cumulative_SOC_deviation: 457.2119 Fuel Consumption: 127.9558
Available condition is not avail... SOC: 1
elapsed_time: 74.729
Episode: 136 Exploration P: 0.0384 Total reward: -4705.910785053895 SOC: 1.0000 Cumulative_SOC_deviation: 457.6603 Fuel Consumption: 129.3075
Available condition is not avail... SOC: 1
elapsed_time: 74.598
Episode: 137 Exploration P: 0.0377 Total reward: -4706.816892144858 SOC: 1.0000 Cumulative_SOC_deviation: 457.8190 Fuel Consumption: 128.6265
Available condition is not avail... SOC: 1
elapsed_time: 74.349
Episode: 138 Exploration P: 0.0370 Total reward: -4694.434906123758 SOC: 1.0000 Cumulative_SOC_deviation: 456.63

Available condition is not avail... SOC: 1
elapsed_time: 74.175
Episode: 174 Exploration P: 0.0206 Total reward: -4709.03475193079 SOC: 1.0000 Cumulative_SOC_deviation: 457.9531 Fuel Consumption: 129.5035
Available condition is not avail... SOC: 1
elapsed_time: 74.116
Episode: 175 Exploration P: 0.0203 Total reward: -4697.83844567095 SOC: 1.0000 Cumulative_SOC_deviation: 456.9212 Fuel Consumption: 128.6265
Available condition is not avail... SOC: 1
elapsed_time: 74.720
Episode: 176 Exploration P: 0.0200 Total reward: -4712.322869587726 SOC: 1.0000 Cumulative_SOC_deviation: 458.2923 Fuel Consumption: 129.4004
Available condition is not avail... SOC: 1
elapsed_time: 74.780
Episode: 177 Exploration P: 0.0198 Total reward: -4709.560447221898 SOC: 1.0000 Cumulative_SOC_deviation: 458.0088 Fuel Consumption: 129.4726
Available condition is not avail... SOC: 1
elapsed_time: 74.446
Episode: 178 Exploration P: 0.0195 Total reward: -4704.938422670841 SOC: 1.0000 Cumulative_SOC_deviation: 457.5920

In [14]:
with open("DDPG1_400.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)