In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 
import scipy.io as sio

from vehicle_model_variant import Environment 
from cell_model import CellModel 
from driver_MDP import Driver_MDP 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
# env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)
driver = Driver_MDP(0.02)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 200 
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1.0 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 5000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 
    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
#     actor_model.load_weights("./DDPG1_trial1/actor_model_checkpoint")
#     critic_model.load_weights("./DDPG1_trial1/critic_model_checkpoint")
#     target_actor.load_weights("./DDPG1_trial1/target_actor_checkpoint")
#     target_critic.load_weights("./DDPG1_trial1/target_critic_checkpoint")
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    if not os.path.exists(root): 
        os.makedirs(root)
        
    actor_model.save_weights("./{}/actor_model.h5".format(root))
    critic_model.save_weights("./{}/critic_model.h5".format(root))
    target_actor.save_weights("./{}/target_actor.h5".format(root))
    target_critic.save_weights("./{}/target_critic.h5".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [14]:
def test_agent(actor_model, reward_factor):
#     test_cycle = driver.get_cycle() 
    test_cycle_path = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
    test_cycle = sio.loadmat(test_cycle_path)
    test_cycle = test_cycle["sch_cycle"][:, 1]
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0
    state = env.reset() 
    while True: 
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        action = policy_epsilon_greedy(tf_state, -1)
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
        
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    
    print("******************* Test is start *****************")
#     print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    plt.subplot(1, 2, 1)
    plt.plot(test_cycle)
    plt.subplot(1, 2, 2)
    plt.plot(env.history["Action"])
    plt.show() 
    return env.history  
    

In [15]:
# print(env.version)

# num_trials = 1
reward_factors = [1, 2, 3]
results_dict = {} 
driving_cycle_path = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
driving_cycle = sio.loadmat(driving_cycle_path)
driving_cycle = driving_cycle["sch_cycle"][:, 1]

for reward_factor in reward_factors: 
    print("")
    print("Trial {}".format(reward_factor))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    episode_test_history = [] 
    episode_num_test = [] 
    for ep in range(total_episodes): 
#         driving_cycle = driver.get_cycle() 
        env = initialization_env(driving_cycle, reward_factor)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
        print("")
        
        if (ep + 1) % 10 == 0: 
#             history = test_agent(actor_model, reward_factor)
            history = env.history 
            episode_test_history.append(history) 
            episode_num_test.append(ep + 1)
            
#         if (ep + 1) % 200 == 0:             
    root = "DDPG_cycleOne_reward_factor{}".format(reward_factor)
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
            
    
    results_dict[reward_factor] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs, 
        "test_history": episode_test_history, 
        "test_episode_num": episode_num_test, 
    }


Trial 1

maximum steps, simulation is done ... 
elapsed_time: 9.183
Episode: 1 Exploration P: 1.0000 Total reward: -669.6301542086956 SOC: 1.0000 Cumulative_SOC_deviation: 481.0124 Fuel Consumption: 188.6177

maximum steps, simulation is done ... 
elapsed_time: 9.215
Episode: 2 Exploration P: 1.0000 Total reward: -676.4870294699607 SOC: 1.0000 Cumulative_SOC_deviation: 485.6016 Fuel Consumption: 190.8855

maximum steps, simulation is done ... 
elapsed_time: 9.735
Episode: 3 Exploration P: 1.0000 Total reward: -679.2840680106785 SOC: 1.0000 Cumulative_SOC_deviation: 486.4813 Fuel Consumption: 192.8027



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_float

maximum steps, simulation is done ... 
elapsed_time: 85.941
Episode: 28 Exploration P: 0.5172 Total reward: -252.02888495345096 SOC: 0.7988 Cumulative_SOC_deviation: 110.7555 Fuel Consumption: 141.2734

maximum steps, simulation is done ... 
elapsed_time: 88.245
Episode: 29 Exploration P: 0.5034 Total reward: -228.27196514526017 SOC: 0.8119 Cumulative_SOC_deviation: 85.7822 Fuel Consumption: 142.4898

maximum steps, simulation is done ... 
elapsed_time: 86.816
Episode: 30 Exploration P: 0.4901 Total reward: -207.32358707736768 SOC: 0.7098 Cumulative_SOC_deviation: 72.5441 Fuel Consumption: 134.7794

maximum steps, simulation is done ... 
elapsed_time: 88.213
Episode: 31 Exploration P: 0.4771 Total reward: -208.99212990416424 SOC: 0.7334 Cumulative_SOC_deviation: 72.1923 Fuel Consumption: 136.7998

maximum steps, simulation is done ... 
elapsed_time: 86.177
Episode: 32 Exploration P: 0.4644 Total reward: -197.18436552198955 SOC: 0.5708 Cumulative_SOC_deviation: 72.9912 Fuel Consumption:

  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (


maximum steps, simulation is done ... 
elapsed_time: 79.754
Episode: 44 Exploration P: 0.3368 Total reward: -593.2858489213535 SOC: 0.0289 Cumulative_SOC_deviation: 505.9538 Fuel Consumption: 87.3321

maximum steps, simulation is done ... 
elapsed_time: 80.379
Episode: 45 Exploration P: 0.3280 Total reward: -447.37486371392083 SOC: 0.1661 Cumulative_SOC_deviation: 352.2500 Fuel Consumption: 95.1249

maximum steps, simulation is done ... 
elapsed_time: 79.680
Episode: 46 Exploration P: 0.3194 Total reward: -560.9083022402615 SOC: 0.0608 Cumulative_SOC_deviation: 472.0404 Fuel Consumption: 88.8680

maximum steps, simulation is done ... 
elapsed_time: 79.935
Episode: 47 Exploration P: 0.3110 Total reward: -559.2155137571144 SOC: 0.0409 Cumulative_SOC_deviation: 471.7597 Fuel Consumption: 87.4558

battery power is 7946.590307366354(+) but condition is not avail
elapsed_time: 65.825
Episode: 48 Exploration P: 0.3043 Total reward: -1466.5895096886507 SOC: -0.0014 Cumulative_SOC_deviation: 39

  2 * r_dis)) * (v_dis - (v_dis ** 2 - 4 * r_dis * p_bat) ** (0.5)) * (p_bat >= 0)


battery power is 6435.155063651266(+) but condition is not avail
elapsed_time: 45.941
Episode: 58 Exploration P: 0.2455 Total reward: -1302.9170900773852 SOC: -0.0004 Cumulative_SOC_deviation: 265.4635 Fuel Consumption: 38.0549

maximum steps, simulation is done ... 
elapsed_time: 79.803
Episode: 59 Exploration P: 0.2391 Total reward: -422.8540661967853 SOC: 0.4991 Cumulative_SOC_deviation: 301.9368 Fuel Consumption: 120.9173

maximum steps, simulation is done ... 
elapsed_time: 80.610
Episode: 60 Exploration P: 0.2329 Total reward: -202.46087921589722 SOC: 0.5036 Cumulative_SOC_deviation: 87.3758 Fuel Consumption: 115.0851

maximum steps, simulation is done ... 
elapsed_time: 79.985
Episode: 61 Exploration P: 0.2269 Total reward: -333.87300838985215 SOC: 0.4933 Cumulative_SOC_deviation: 217.5524 Fuel Consumption: 116.3206

maximum steps, simulation is done ... 
elapsed_time: 80.022
Episode: 62 Exploration P: 0.2210 Total reward: -399.2780202631183 SOC: 0.5090 Cumulative_SOC_deviation:

maximum steps, simulation is done ... 
elapsed_time: 106.764
Episode: 98 Exploration P: 0.0911 Total reward: -508.49370616348045 SOC: 0.1829 Cumulative_SOC_deviation: 424.5609 Fuel Consumption: 83.9328

maximum steps, simulation is done ... 
elapsed_time: 107.010
Episode: 99 Exploration P: 0.0889 Total reward: -561.1405204879708 SOC: 0.0852 Cumulative_SOC_deviation: 483.7018 Fuel Consumption: 77.4387

maximum steps, simulation is done ... 
elapsed_time: 107.162
Episode: 100 Exploration P: 0.0867 Total reward: -507.12102428212546 SOC: 0.1334 Cumulative_SOC_deviation: 425.5219 Fuel Consumption: 81.5991

maximum steps, simulation is done ... 
elapsed_time: 109.152
Episode: 101 Exploration P: 0.0847 Total reward: -689.3773937698656 SOC: 0.0816 Cumulative_SOC_deviation: 606.0542 Fuel Consumption: 83.3232

maximum steps, simulation is done ... 
elapsed_time: 108.024
Episode: 102 Exploration P: 0.0826 Total reward: -674.2066580488305 SOC: 0.0755 Cumulative_SOC_deviation: 585.3556 Fuel Consump

battery power is 663.7725566044937(+) but condition is not avail
elapsed_time: 100.902
Episode: 135 Exploration P: 0.0504 Total reward: -1951.698890698868 SOC: -0.0002 Cumulative_SOC_deviation: 615.1885 Fuel Consumption: 337.2041

battery power is 773.9037145420134(+) but condition is not avail
elapsed_time: 55.127
Episode: 136 Exploration P: 0.0499 Total reward: -1387.5342577606707 SOC: -0.0009 Cumulative_SOC_deviation: 278.1432 Fuel Consumption: 110.0855

battery power is 651.8567835680633(+) but condition is not avail
elapsed_time: 48.144
Episode: 137 Exploration P: 0.0494 Total reward: -1268.9753464297708 SOC: -0.0003 Cumulative_SOC_deviation: 229.2923 Fuel Consumption: 40.2841

battery power is 4556.04963138524(+) but condition is not avail
elapsed_time: 81.540
Episode: 138 Exploration P: 0.0486 Total reward: -1520.4546644039779 SOC: -0.0005 Cumulative_SOC_deviation: 455.7570 Fuel Consumption: 65.2990

battery power is 4556.04963138524(+) but condition is not avail
elapsed_time: 9

battery power is 4455.100664393433(+) but condition is not avail
elapsed_time: 63.054
Episode: 172 Exploration P: 0.0289 Total reward: -1511.7114890742714 SOC: -0.0001 Cumulative_SOC_deviation: 342.2331 Fuel Consumption: 170.1720

battery power is 8639.66101848357(+) but condition is not avail
elapsed_time: 101.891
Episode: 173 Exploration P: 0.0284 Total reward: -1838.994373717253 SOC: -0.0006 Cumulative_SOC_deviation: 584.7270 Fuel Consumption: 254.8687

maximum steps, simulation is done ... 
elapsed_time: 107.585
Episode: 174 Exploration P: 0.0279 Total reward: -679.047454772292 SOC: 0.0207 Cumulative_SOC_deviation: 595.1194 Fuel Consumption: 83.9280

battery power is 9344.08275176345(+) but condition is not avail
elapsed_time: 88.896
Episode: 175 Exploration P: 0.0275 Total reward: -1994.926126939591 SOC: -0.0013 Cumulative_SOC_deviation: 540.9212 Fuel Consumption: 454.6071

maximum steps, simulation is done ... 
elapsed_time: 107.096
Episode: 176 Exploration P: 0.0271 Total reward



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

maximum steps, simulation is done ... 
elapsed_time: 37.519
Episode: 4 Exploration P: 0.9903 Total reward: -1160.5950497418526 SOC: 1.0000 Cumulative_SOC_deviation: 483.4426 Fuel Consumption: 193.7098

maximum steps, simulation is done ... 
elapsed_time: 79.891
Episode: 5 Exploration P: 0.9638 Total reward: -1119.8001429426122 SOC: 0.9990 Cumulative_SOC_deviation: 467.0034 Fuel Consumption: 185.

maximum steps, simulation is done ... 
elapsed_time: 80.666
Episode: 38 Exploration P: 0.3954 Total reward: -499.80149871503767 SOC: 0.4386 Cumulative_SOC_deviation: 192.5355 Fuel Consumption: 114.7305

maximum steps, simulation is done ... 
elapsed_time: 80.502
Episode: 39 Exploration P: 0.3849 Total reward: -765.1684500489621 SOC: 0.2734 Cumulative_SOC_deviation: 331.0326 Fuel Consumption: 103.1032

maximum steps, simulation is done ... 
elapsed_time: 80.689
Episode: 40 Exploration P: 0.3748 Total reward: -790.9262885558237 SOC: 0.2158 Cumulative_SOC_deviation: 346.2721 Fuel Consumption: 98.3822

maximum steps, simulation is done ... 
elapsed_time: 80.805
Episode: 41 Exploration P: 0.3649 Total reward: -856.512385708299 SOC: 0.1709 Cumulative_SOC_deviation: 380.4361 Fuel Consumption: 95.6403

maximum steps, simulation is done ... 
elapsed_time: 81.164
Episode: 42 Exploration P: 0.3553 Total reward: -959.8877920084235 SOC: 0.1039 Cumulative_SOC_deviation: 434.5616 Fuel Consumption: 90

maximum steps, simulation is done ... 
elapsed_time: 86.817
Episode: 78 Exploration P: 0.1485 Total reward: -941.7136202096867 SOC: 0.2110 Cumulative_SOC_deviation: 421.7625 Fuel Consumption: 98.1887

maximum steps, simulation is done ... 
elapsed_time: 108.792
Episode: 79 Exploration P: 0.1448 Total reward: -973.2998180796768 SOC: 0.2081 Cumulative_SOC_deviation: 437.5159 Fuel Consumption: 98.2681

maximum steps, simulation is done ... 
elapsed_time: 136.782
Episode: 80 Exploration P: 0.1411 Total reward: -973.139017654732 SOC: 0.1564 Cumulative_SOC_deviation: 438.6951 Fuel Consumption: 95.7488

maximum steps, simulation is done ... 
elapsed_time: 124.034
Episode: 81 Exploration P: 0.1376 Total reward: -1029.789309500579 SOC: 0.2253 Cumulative_SOC_deviation: 464.3522 Fuel Consumption: 101.0848

maximum steps, simulation is done ... 
elapsed_time: 86.513
Episode: 82 Exploration P: 0.1341 Total reward: -1000.8706300486216 SOC: 0.2716 Cumulative_SOC_deviation: 448.5680 Fuel Consumption: 

maximum steps, simulation is done ... 
elapsed_time: 85.293
Episode: 119 Exploration P: 0.0550 Total reward: -1182.2848193124103 SOC: 0.1330 Cumulative_SOC_deviation: 543.3992 Fuel Consumption: 95.4865

maximum steps, simulation is done ... 
elapsed_time: 83.712
Episode: 120 Exploration P: 0.0538 Total reward: -1223.1687592013961 SOC: 0.0728 Cumulative_SOC_deviation: 565.5738 Fuel Consumption: 92.0212

maximum steps, simulation is done ... 
elapsed_time: 82.966
Episode: 121 Exploration P: 0.0526 Total reward: -1334.4995709502728 SOC: 0.1208 Cumulative_SOC_deviation: 619.3945 Fuel Consumption: 95.7105

maximum steps, simulation is done ... 
elapsed_time: 83.038
Episode: 122 Exploration P: 0.0514 Total reward: -1298.7702097237297 SOC: 0.1184 Cumulative_SOC_deviation: 602.1293 Fuel Consumption: 94.5117

maximum steps, simulation is done ... 
elapsed_time: 83.071
Episode: 123 Exploration P: 0.0503 Total reward: -1360.105159365311 SOC: 0.0976 Cumulative_SOC_deviation: 633.2596 Fuel Consumpt

battery power is 3284.290206654804(+) but condition is not avail
elapsed_time: 55.347
Episode: 157 Exploration P: 0.0320 Total reward: -2202.23678608871 SOC: -0.0002 Cumulative_SOC_deviation: 418.0063 Fuel Consumption: 367.5181

battery power is 7119.728871212785(+) but condition is not avail
elapsed_time: 67.968
Episode: 158 Exploration P: 0.0316 Total reward: -2284.0684784766045 SOC: -0.0002 Cumulative_SOC_deviation: 512.9758 Fuel Consumption: 259.3181

battery power is 7805.227240959868(+) but condition is not avail
elapsed_time: 34.054
Episode: 159 Exploration P: 0.0313 Total reward: -1460.8215000033206 SOC: -0.0012 Cumulative_SOC_deviation: 208.7559 Fuel Consumption: 44.5129

battery power is 13412.245636872867(+) but condition is not avail
elapsed_time: 80.517
Episode: 160 Exploration P: 0.0307 Total reward: -2577.5219199143057 SOC: -0.0002 Cumulative_SOC_deviation: 636.6471 Fuel Consumption: 305.4288

battery power is 5336.237460118426(+) but condition is not avail
elapsed_time:

battery power is 1477.0631565025187(+) but condition is not avail
elapsed_time: 53.438
Episode: 193 Exploration P: 0.0220 Total reward: -1825.4559451908685 SOC: -0.0019 Cumulative_SOC_deviation: 300.1808 Fuel Consumption: 226.3918

battery power is 4556.04963138524(+) but condition is not avail
elapsed_time: 77.350
Episode: 194 Exploration P: 0.0217 Total reward: -2307.792614008112 SOC: -0.0008 Cumulative_SOC_deviation: 482.8481 Fuel Consumption: 343.2988

battery power is 2617.0395492591833(+) but condition is not avail
elapsed_time: 79.850
Episode: 195 Exploration P: 0.0215 Total reward: -2514.526920539327 SOC: -0.0008 Cumulative_SOC_deviation: 505.0455 Fuel Consumption: 505.7311

battery power is 8370.966351628711(+) but condition is not avail
elapsed_time: 97.425
Episode: 196 Exploration P: 0.0212 Total reward: -2924.9227915731544 SOC: -0.0011 Cumulative_SOC_deviation: 639.4595 Fuel Consumption: 647.2069

maximum steps, simulation is done ... 
elapsed_time: 82.318
Episode: 197 Expl

maximum steps, simulation is done ... 
elapsed_time: 83.540
Episode: 19 Exploration P: 0.6594 Total reward: -1133.202968875912 SOC: 0.9837 Cumulative_SOC_deviation: 327.0732 Fuel Consumption: 151.9834

maximum steps, simulation is done ... 
elapsed_time: 83.357
Episode: 20 Exploration P: 0.6418 Total reward: -1131.7996355666503 SOC: 0.9995 Cumulative_SOC_deviation: 325.3513 Fuel Consumption: 155.7458

maximum steps, simulation is done ... 
elapsed_time: 83.489
Episode: 21 Exploration P: 0.6247 Total reward: -1212.2083063504758 SOC: 0.9939 Cumulative_SOC_deviation: 353.5389 Fuel Consumption: 151.5917

maximum steps, simulation is done ... 
elapsed_time: 83.521
Episode: 22 Exploration P: 0.6080 Total reward: -904.5444357247202 SOC: 0.9858 Cumulative_SOC_deviation: 250.4448 Fuel Consumption: 153.2100

maximum steps, simulation is done ... 
elapsed_time: 83.640
Episode: 23 Exploration P: 0.5918 Total reward: -926.5329887585237 SOC: 0.9737 Cumulative_SOC_deviation: 258.2004 Fuel Consumption

battery power is 11598.447586310172(+) but condition is not avail
elapsed_time: 79.345
Episode: 59 Exploration P: 0.2472 Total reward: -2458.8340884053005 SOC: -0.0006 Cumulative_SOC_deviation: 461.0007 Fuel Consumption: 77.6345

maximum steps, simulation is done ... 
elapsed_time: 85.505
Episode: 60 Exploration P: 0.2408 Total reward: -1133.3108825057423 SOC: 0.5598 Cumulative_SOC_deviation: 335.5717 Fuel Consumption: 126.5959

maximum steps, simulation is done ... 
elapsed_time: 85.454
Episode: 61 Exploration P: 0.2345 Total reward: -314.9756045485569 SOC: 0.5244 Cumulative_SOC_deviation: 67.5972 Fuel Consumption: 112.1839

maximum steps, simulation is done ... 
elapsed_time: 85.726
Episode: 62 Exploration P: 0.2284 Total reward: -390.2480702275489 SOC: 0.5155 Cumulative_SOC_deviation: 91.9882 Fuel Consumption: 114.2835

maximum steps, simulation is done ... 
elapsed_time: 85.455
Episode: 63 Exploration P: 0.2225 Total reward: -563.2901131748979 SOC: 0.5108 Cumulative_SOC_deviation: 

battery power is 6345.029147455788(+) but condition is not avail
elapsed_time: 57.707
Episode: 99 Exploration P: 0.0934 Total reward: -1900.217844921307 SOC: -0.0011 Cumulative_SOC_deviation: 282.8135 Fuel Consumption: 53.5816

battery power is 7957.703445167204(+) but condition is not avail
elapsed_time: 56.551
Episode: 100 Exploration P: 0.0919 Total reward: -2030.3451697482465 SOC: -0.0002 Cumulative_SOC_deviation: 321.0167 Fuel Consumption: 69.0964

battery power is 5157.377688440195(+) but condition is not avail
elapsed_time: 52.020
Episode: 101 Exploration P: 0.0905 Total reward: -2167.660678700922 SOC: -0.0001 Cumulative_SOC_deviation: 345.3480 Fuel Consumption: 133.5106

battery power is 4556.04963138524(+) but condition is not avail
elapsed_time: 63.827
Episode: 102 Exploration P: 0.0889 Total reward: -2257.1571034563085 SOC: -0.0002 Cumulative_SOC_deviation: 400.8236 Fuel Consumption: 56.4879

battery power is 10880.247543512363(+) but condition is not avail
elapsed_time: 45.

maximum steps, simulation is done ... 
elapsed_time: 85.091
Episode: 136 Exploration P: 0.0488 Total reward: -1750.0215604879945 SOC: 0.2003 Cumulative_SOC_deviation: 551.7881 Fuel Consumption: 94.6574

maximum steps, simulation is done ... 
elapsed_time: 85.088
Episode: 137 Exploration P: 0.0478 Total reward: -1154.1286350623254 SOC: 0.1934 Cumulative_SOC_deviation: 355.6129 Fuel Consumption: 87.2899

maximum steps, simulation is done ... 
elapsed_time: 84.466
Episode: 138 Exploration P: 0.0468 Total reward: -1217.0541835653262 SOC: 0.3509 Cumulative_SOC_deviation: 372.7651 Fuel Consumption: 98.7590

maximum steps, simulation is done ... 
elapsed_time: 84.326
Episode: 139 Exploration P: 0.0458 Total reward: -894.2997956627557 SOC: 0.4041 Cumulative_SOC_deviation: 264.7323 Fuel Consumption: 100.1030

maximum steps, simulation is done ... 
elapsed_time: 84.362
Episode: 140 Exploration P: 0.0448 Total reward: -861.7396369459348 SOC: 0.3704 Cumulative_SOC_deviation: 254.7663 Fuel Consumpt

maximum steps, simulation is done ... 
elapsed_time: 85.266
Episode: 176 Exploration P: 0.0242 Total reward: -1019.3583632189956 SOC: 0.3774 Cumulative_SOC_deviation: 305.4539 Fuel Consumption: 102.9968

maximum steps, simulation is done ... 
elapsed_time: 84.979
Episode: 177 Exploration P: 0.0239 Total reward: -1461.0677624353839 SOC: 0.2679 Cumulative_SOC_deviation: 455.3662 Fuel Consumption: 94.9691

maximum steps, simulation is done ... 
elapsed_time: 85.332
Episode: 178 Exploration P: 0.0235 Total reward: -1231.9035820133827 SOC: 0.3457 Cumulative_SOC_deviation: 376.6801 Fuel Consumption: 101.8633

maximum steps, simulation is done ... 
elapsed_time: 85.074
Episode: 179 Exploration P: 0.0231 Total reward: -1213.110175225524 SOC: 0.2742 Cumulative_SOC_deviation: 374.2194 Fuel Consumption: 90.4520

maximum steps, simulation is done ... 
elapsed_time: 85.223
Episode: 180 Exploration P: 0.0228 Total reward: -923.2535899601295 SOC: 0.3998 Cumulative_SOC_deviation: 274.9729 Fuel Consump

In [16]:
with open("DDPG_cycleOne_1to3.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [17]:
# results_dict