In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 
import scipy.io as sio

from vehicle_model_variant import Environment 
from cell_model import CellModel 
from driver_MDP import Driver_MDP 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
# env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)
driver = Driver_MDP(0.02)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 200 
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1.0 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 5000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 
    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
#     actor_model.load_weights("./DDPG1_trial1/actor_model_checkpoint")
#     critic_model.load_weights("./DDPG1_trial1/critic_model_checkpoint")
#     target_actor.load_weights("./DDPG1_trial1/target_actor_checkpoint")
#     target_critic.load_weights("./DDPG1_trial1/target_critic_checkpoint")
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    if not os.path.exists(root): 
        os.makedirs(root)
        
    actor_model.save_weights("./{}/actor_model.h5".format(root))
    critic_model.save_weights("./{}/critic_model.h5".format(root))
    target_actor.save_weights("./{}/target_actor.h5".format(root))
    target_critic.save_weights("./{}/target_critic.h5".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [14]:
def test_agent(actor_model, reward_factor):
#     test_cycle = driver.get_cycle() 
    test_cycle_path = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
    test_cycle = sio.loadmat(test_cycle_path)
    test_cycle = test_cycle["sch_cycle"][:, 1]
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0
    state = env.reset() 
    while True: 
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        action = policy_epsilon_greedy(tf_state, -1)
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
        
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    
    print("******************* Test is start *****************")
#     print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    plt.subplot(1, 2, 1)
    plt.plot(test_cycle)
    plt.subplot(1, 2, 2)
    plt.plot(env.history["Action"])
    plt.show() 
    return env.history  
    

In [15]:
# print(env.version)

# num_trials = 1
reward_factors = [1, 2, 3]
results_dict = {} 
driving_cycle_path = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
driving_cycle = sio.loadmat(driving_cycle_path)
driving_cycle = driving_cycle["sch_cycle"][:, 1]

for reward_factor in reward_factors: 
    print("")
    print("Trial {}".format(reward_factor))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    episode_test_history = [] 
    episode_num_test = [] 
    for ep in range(total_episodes): 
#         driving_cycle = driver.get_cycle() 
        env = initialization_env(driving_cycle, reward_factor)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
        print("")
        
        if (ep + 1) % 10 == 0: 
#             history = test_agent(actor_model, reward_factor)
            history = env.history 
            episode_test_history.append(history) 
            episode_num_test.append(ep + 1)
            
#         if (ep + 1) % 200 == 0:             
    root = "DDPG_cycleOne_reward_factor{}".format(reward_factor)
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
            
    
    results_dict[reward_factor] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs, 
        "test_history": episode_test_history, 
        "test_episode_num": episode_num_test, 
    }


Trial 1

maximum steps, simulation is done ... 
elapsed_time: 14.995
Episode: 1 Exploration P: 1.0000 Total reward: -654.067536675542 SOC: 1.0000 Cumulative_SOC_deviation: 500.7182 Fuel Consumption: 153.3493

maximum steps, simulation is done ... 
elapsed_time: 13.954
Episode: 2 Exploration P: 1.0000 Total reward: -645.3713033938169 SOC: 1.0000 Cumulative_SOC_deviation: 492.8165 Fuel Consumption: 152.5548

maximum steps, simulation is done ... 
elapsed_time: 14.269
Episode: 3 Exploration P: 1.0000 Total reward: -645.9060113530802 SOC: 1.0000 Cumulative_SOC_deviation: 497.0967 Fuel Consumption: 148.8094



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_flo

maximum steps, simulation is done ... 
elapsed_time: 90.357
Episode: 28 Exploration P: 0.5172 Total reward: -452.5001815595476 SOC: 1.0000 Cumulative_SOC_deviation: 355.4982 Fuel Consumption: 97.0020

maximum steps, simulation is done ... 
elapsed_time: 90.575
Episode: 29 Exploration P: 0.5034 Total reward: -461.61917747246326 SOC: 1.0000 Cumulative_SOC_deviation: 367.6301 Fuel Consumption: 93.9891

maximum steps, simulation is done ... 
elapsed_time: 90.064
Episode: 30 Exploration P: 0.4901 Total reward: -448.49482446506477 SOC: 1.0000 Cumulative_SOC_deviation: 354.5264 Fuel Consumption: 93.9684

maximum steps, simulation is done ... 
elapsed_time: 91.050
Episode: 31 Exploration P: 0.4771 Total reward: -455.07937756891 SOC: 1.0000 Cumulative_SOC_deviation: 362.9372 Fuel Consumption: 92.1421

maximum steps, simulation is done ... 
elapsed_time: 89.823
Episode: 32 Exploration P: 0.4644 Total reward: -384.30729192097743 SOC: 1.0000 Cumulative_SOC_deviation: 297.2417 Fuel Consumption: 87.

maximum steps, simulation is done ... 
elapsed_time: 91.771
Episode: 69 Exploration P: 0.1745 Total reward: -339.0889219233242 SOC: 0.3406 Cumulative_SOC_deviation: 303.2116 Fuel Consumption: 35.8773

maximum steps, simulation is done ... 
elapsed_time: 90.400
Episode: 70 Exploration P: 0.1701 Total reward: -350.8245401728578 SOC: 0.3684 Cumulative_SOC_deviation: 312.3883 Fuel Consumption: 38.4362

maximum steps, simulation is done ... 
elapsed_time: 91.310
Episode: 71 Exploration P: 0.1657 Total reward: -361.74351380809355 SOC: 0.2820 Cumulative_SOC_deviation: 330.5197 Fuel Consumption: 31.2238

maximum steps, simulation is done ... 
elapsed_time: 91.649
Episode: 72 Exploration P: 0.1615 Total reward: -375.0597432194836 SOC: 0.2610 Cumulative_SOC_deviation: 344.6923 Fuel Consumption: 30.3674

maximum steps, simulation is done ... 
elapsed_time: 91.419
Episode: 73 Exploration P: 0.1574 Total reward: -351.6975150670546 SOC: 0.3293 Cumulative_SOC_deviation: 316.5837 Fuel Consumption: 35.

maximum steps, simulation is done ... 
elapsed_time: 92.261
Episode: 110 Exploration P: 0.0634 Total reward: -530.6093231988131 SOC: 0.0170 Cumulative_SOC_deviation: 516.8747 Fuel Consumption: 13.7346

maximum steps, simulation is done ... 
elapsed_time: 91.585
Episode: 111 Exploration P: 0.0620 Total reward: -535.1037961566425 SOC: 0.0139 Cumulative_SOC_deviation: 521.8232 Fuel Consumption: 13.2806

battery power is 7139.380133788138(+) but condition is not avail
elapsed_time: 90.321
Episode: 112 Exploration P: 0.0606 Total reward: -1542.0689579718978 SOC: -0.0001 Cumulative_SOC_deviation: 529.3061 Fuel Consumption: 13.3630

battery power is 5967.407130477593(+) but condition is not avail
elapsed_time: 85.092
Episode: 113 Exploration P: 0.0593 Total reward: -1498.4579397952903 SOC: -0.0001 Cumulative_SOC_deviation: 488.1824 Fuel Consumption: 10.8757

maximum steps, simulation is done ... 
elapsed_time: 92.258
Episode: 114 Exploration P: 0.0580 Total reward: -532.8582485047749 SOC: 0.0

maximum steps, simulation is done ... 
elapsed_time: 91.918
Episode: 147 Exploration P: 0.0316 Total reward: -428.6135056983125 SOC: 0.2559 Cumulative_SOC_deviation: 401.5473 Fuel Consumption: 27.0662

maximum steps, simulation is done ... 
elapsed_time: 91.716
Episode: 148 Exploration P: 0.0310 Total reward: -428.9358232371271 SOC: 0.2312 Cumulative_SOC_deviation: 403.0457 Fuel Consumption: 25.8901

maximum steps, simulation is done ... 
elapsed_time: 91.170
Episode: 149 Exploration P: 0.0304 Total reward: -446.4501045727574 SOC: 0.2241 Cumulative_SOC_deviation: 420.0961 Fuel Consumption: 26.3540

maximum steps, simulation is done ... 
elapsed_time: 91.188
Episode: 150 Exploration P: 0.0299 Total reward: -442.220091582241 SOC: 0.2110 Cumulative_SOC_deviation: 417.5212 Fuel Consumption: 24.6989

maximum steps, simulation is done ... 
elapsed_time: 92.076
Episode: 151 Exploration P: 0.0293 Total reward: -484.273704315926 SOC: 0.1653 Cumulative_SOC_deviation: 462.6853 Fuel Consumption: 2

maximum steps, simulation is done ... 
elapsed_time: 92.068
Episode: 188 Exploration P: 0.0170 Total reward: -352.58330987036516 SOC: 0.3750 Cumulative_SOC_deviation: 316.6912 Fuel Consumption: 35.8921

maximum steps, simulation is done ... 
elapsed_time: 91.916
Episode: 189 Exploration P: 0.0168 Total reward: -464.2074004804843 SOC: 0.2226 Cumulative_SOC_deviation: 438.4781 Fuel Consumption: 25.7293

maximum steps, simulation is done ... 
elapsed_time: 92.022
Episode: 190 Exploration P: 0.0166 Total reward: -438.42071485751995 SOC: 0.1575 Cumulative_SOC_deviation: 416.5535 Fuel Consumption: 21.8672

maximum steps, simulation is done ... 
elapsed_time: 92.234
Episode: 191 Exploration P: 0.0164 Total reward: -475.8289333716386 SOC: 0.2766 Cumulative_SOC_deviation: 445.3534 Fuel Consumption: 30.4756

maximum steps, simulation is done ... 
elapsed_time: 91.693
Episode: 192 Exploration P: 0.0163 Total reward: -442.4676300908389 SOC: 0.2471 Cumulative_SOC_deviation: 413.7622 Fuel Consumptio

maximum steps, simulation is done ... 
elapsed_time: 90.917
Episode: 15 Exploration P: 0.7347 Total reward: -1014.1695272404218 SOC: 1.0000 Cumulative_SOC_deviation: 447.2183 Fuel Consumption: 119.7328

maximum steps, simulation is done ... 
elapsed_time: 90.656
Episode: 16 Exploration P: 0.7151 Total reward: -1050.253434637234 SOC: 1.0000 Cumulative_SOC_deviation: 465.9258 Fuel Consumption: 118.4018

maximum steps, simulation is done ... 
elapsed_time: 90.905
Episode: 17 Exploration P: 0.6960 Total reward: -1008.4723460061131 SOC: 1.0000 Cumulative_SOC_deviation: 445.1333 Fuel Consumption: 118.2058

maximum steps, simulation is done ... 
elapsed_time: 90.980
Episode: 18 Exploration P: 0.6774 Total reward: -1020.2843361708899 SOC: 1.0000 Cumulative_SOC_deviation: 453.7581 Fuel Consumption: 112.7681

maximum steps, simulation is done ... 
elapsed_time: 89.528
Episode: 19 Exploration P: 0.6594 Total reward: -982.3058165886298 SOC: 1.0000 Cumulative_SOC_deviation: 435.4395 Fuel Consumptio

maximum steps, simulation is done ... 
elapsed_time: 81.978
Episode: 56 Exploration P: 0.2451 Total reward: -247.93114224639834 SOC: 0.5942 Cumulative_SOC_deviation: 97.2049 Fuel Consumption: 53.5213

maximum steps, simulation is done ... 
elapsed_time: 82.642
Episode: 57 Exploration P: 0.2387 Total reward: -205.28651320809587 SOC: 0.6107 Cumulative_SOC_deviation: 75.1294 Fuel Consumption: 55.0278

maximum steps, simulation is done ... 
elapsed_time: 82.476
Episode: 58 Exploration P: 0.2325 Total reward: -347.2694749806034 SOC: 0.5316 Cumulative_SOC_deviation: 148.7932 Fuel Consumption: 49.6830

maximum steps, simulation is done ... 
elapsed_time: 82.304
Episode: 59 Exploration P: 0.2265 Total reward: -351.1969314016972 SOC: 0.5242 Cumulative_SOC_deviation: 151.3606 Fuel Consumption: 48.4758

maximum steps, simulation is done ... 
elapsed_time: 82.446
Episode: 60 Exploration P: 0.2206 Total reward: -303.88771314974804 SOC: 0.5676 Cumulative_SOC_deviation: 125.6681 Fuel Consumption: 52.

maximum steps, simulation is done ... 
elapsed_time: 83.141
Episode: 97 Exploration P: 0.0863 Total reward: -973.8374224118223 SOC: 0.0808 Cumulative_SOC_deviation: 477.8227 Fuel Consumption: 18.1920

maximum steps, simulation is done ... 
elapsed_time: 82.052
Episode: 98 Exploration P: 0.0842 Total reward: -888.0919776232247 SOC: 0.1115 Cumulative_SOC_deviation: 433.9233 Fuel Consumption: 20.2453

maximum steps, simulation is done ... 
elapsed_time: 82.553
Episode: 99 Exploration P: 0.0822 Total reward: -1046.5855393424504 SOC: 0.0550 Cumulative_SOC_deviation: 515.0635 Fuel Consumption: 16.4586

maximum steps, simulation is done ... 
elapsed_time: 83.166
Episode: 100 Exploration P: 0.0802 Total reward: -968.4340230107385 SOC: 0.1059 Cumulative_SOC_deviation: 474.1769 Fuel Consumption: 20.0803

maximum steps, simulation is done ... 
elapsed_time: 82.632
Episode: 101 Exploration P: 0.0783 Total reward: -884.9491654359588 SOC: 0.1397 Cumulative_SOC_deviation: 431.3614 Fuel Consumption: 2

battery power is 3023.017414106708(+) but condition is not avail
elapsed_time: 68.249
Episode: 136 Exploration P: 0.0384 Total reward: -1821.5619380139212 SOC: -0.0007 Cumulative_SOC_deviation: 407.4716 Fuel Consumption: 7.8201

battery power is 5727.992250018656(+) but condition is not avail
elapsed_time: 67.758
Episode: 137 Exploration P: 0.0378 Total reward: -1820.3449844857219 SOC: -0.0002 Cumulative_SOC_deviation: 406.7440 Fuel Consumption: 8.0575

battery power is 8593.399190081254(+) but condition is not avail
elapsed_time: 64.918
Episode: 138 Exploration P: 0.0372 Total reward: -1786.1354934000992 SOC: -0.0001 Cumulative_SOC_deviation: 390.1862 Fuel Consumption: 6.9633

maximum steps, simulation is done ... 
elapsed_time: 83.521
Episode: 139 Exploration P: 0.0365 Total reward: -680.9479152780656 SOC: 0.4458 Cumulative_SOC_deviation: 320.2579 Fuel Consumption: 40.4322

maximum steps, simulation is done ... 
elapsed_time: 83.463
Episode: 140 Exploration P: 0.0357 Total reward: -4

maximum steps, simulation is done ... 
elapsed_time: 83.937
Episode: 177 Exploration P: 0.0193 Total reward: -1080.2031320153935 SOC: 0.1327 Cumulative_SOC_deviation: 530.1923 Fuel Consumption: 19.8185

maximum steps, simulation is done ... 
elapsed_time: 83.677
Episode: 178 Exploration P: 0.0191 Total reward: -1051.64020264841 SOC: 0.0887 Cumulative_SOC_deviation: 517.7751 Fuel Consumption: 16.0901

maximum steps, simulation is done ... 
elapsed_time: 83.942
Episode: 179 Exploration P: 0.0188 Total reward: -1070.9764560932674 SOC: 0.1249 Cumulative_SOC_deviation: 525.7363 Fuel Consumption: 19.5038

maximum steps, simulation is done ... 
elapsed_time: 83.965
Episode: 180 Exploration P: 0.0186 Total reward: -1117.6383584127263 SOC: 0.1558 Cumulative_SOC_deviation: 547.9492 Fuel Consumption: 21.7400

maximum steps, simulation is done ... 
elapsed_time: 82.584
Episode: 181 Exploration P: 0.0183 Total reward: -1071.3538932733277 SOC: 0.1107 Cumulative_SOC_deviation: 526.0819 Fuel Consumpti

maximum steps, simulation is done ... 
elapsed_time: 40.265
Episode: 4 Exploration P: 0.9903 Total reward: -1645.8120453949514 SOC: 1.0000 Cumulative_SOC_deviation: 498.8427 Fuel Consumption: 149.2840

maximum steps, simulation is done ... 
elapsed_time: 82.177
Episode: 5 Exploration P: 0.9638 Total reward: -1624.6289328205758 SOC: 1.0000 Cumulative_SOC_deviation: 491.7232 Fuel Consumption: 149.4594

maximum steps, simulation is done ... 
elapsed_time: 82.588
Episode: 6 Exploration P: 0.9379 Total reward: -1638.3077405948281 SOC: 1.0000 Cumulative_SOC_deviation: 498.1263 Fuel Consumption: 143.9289

maximum steps, simulation is done ... 
elapsed_time: 82.842
Episode: 7 Exploration P: 0.9128 Total reward: -1613.371922134036 SOC: 1.0000 Cumulative_SOC_deviation: 490.2064 Fuel Consumption: 142.7526

maximum steps, simulation is done ... 
elapsed_time: 80.568
Episode: 8 Exploration P: 0.8883 Total reward: -1586.6748923501566 SOC: 1.0000 Cumulative_SOC_deviation: 482.2429 Fuel Consumption: 1

maximum steps, simulation is done ... 
elapsed_time: 83.924
Episode: 45 Exploration P: 0.3280 Total reward: -504.4627865444278 SOC: 0.8968 Cumulative_SOC_deviation: 142.6130 Fuel Consumption: 76.6237

maximum steps, simulation is done ... 
elapsed_time: 84.209
Episode: 46 Exploration P: 0.3194 Total reward: -323.2365562671972 SOC: 0.7605 Cumulative_SOC_deviation: 85.5508 Fuel Consumption: 66.5841

maximum steps, simulation is done ... 
elapsed_time: 84.365
Episode: 47 Exploration P: 0.3110 Total reward: -280.4623061257783 SOC: 0.7430 Cumulative_SOC_deviation: 71.8774 Fuel Consumption: 64.8300

maximum steps, simulation is done ... 
elapsed_time: 84.746
Episode: 48 Exploration P: 0.3028 Total reward: -351.58030471274134 SOC: 0.8301 Cumulative_SOC_deviation: 93.1931 Fuel Consumption: 72.0011

maximum steps, simulation is done ... 
elapsed_time: 84.212
Episode: 49 Exploration P: 0.2949 Total reward: -284.23082931678834 SOC: 0.7318 Cumulative_SOC_deviation: 73.1852 Fuel Consumption: 64.675

maximum steps, simulation is done ... 
elapsed_time: 85.128
Episode: 86 Exploration P: 0.1131 Total reward: -1258.5924740812836 SOC: 0.1819 Cumulative_SOC_deviation: 411.1040 Fuel Consumption: 25.2806

maximum steps, simulation is done ... 
elapsed_time: 84.724
Episode: 87 Exploration P: 0.1104 Total reward: -1357.042389825524 SOC: 0.1316 Cumulative_SOC_deviation: 445.0659 Fuel Consumption: 21.8447

maximum steps, simulation is done ... 
elapsed_time: 84.897
Episode: 88 Exploration P: 0.1076 Total reward: -1298.2341904470968 SOC: 0.1330 Cumulative_SOC_deviation: 425.6592 Fuel Consumption: 21.2565

maximum steps, simulation is done ... 
elapsed_time: 84.904
Episode: 89 Exploration P: 0.1050 Total reward: -1329.562550196723 SOC: 0.1694 Cumulative_SOC_deviation: 435.0289 Fuel Consumption: 24.4758

maximum steps, simulation is done ... 
elapsed_time: 84.909
Episode: 90 Exploration P: 0.1024 Total reward: -1323.126426012379 SOC: 0.1340 Cumulative_SOC_deviation: 433.8225 Fuel Consumption: 21

battery power is 7909.022371714286(+) but condition is not avail
elapsed_time: 82.790
Episode: 126 Exploration P: 0.0453 Total reward: -2600.7760654769445 SOC: -0.0007 Cumulative_SOC_deviation: 529.9723 Fuel Consumption: 12.6613

battery power is 13127.960802209038(+) but condition is not avail
elapsed_time: 80.787
Episode: 127 Exploration P: 0.0444 Total reward: -2479.626433390107 SOC: -0.0003 Cumulative_SOC_deviation: 489.7470 Fuel Consumption: 12.1863

battery power is 7575.488724445846(+) but condition is not avail
elapsed_time: 75.474
Episode: 128 Exploration P: 0.0436 Total reward: -2386.388592215527 SOC: -0.0006 Cumulative_SOC_deviation: 459.4319 Fuel Consumption: 9.8949

battery power is 6921.582308501251(+) but condition is not avail
elapsed_time: 69.853
Episode: 129 Exploration P: 0.0428 Total reward: -2227.7623052691542 SOC: -0.0001 Cumulative_SOC_deviation: 407.1614 Fuel Consumption: 8.0782

battery power is 3262.287183941105(+) but condition is not avail
elapsed_time: 69.1

maximum steps, simulation is done ... 
elapsed_time: 85.754
Episode: 166 Exploration P: 0.0222 Total reward: -509.41862829957404 SOC: 0.4813 Cumulative_SOC_deviation: 156.0275 Fuel Consumption: 41.3360

maximum steps, simulation is done ... 
elapsed_time: 85.772
Episode: 167 Exploration P: 0.0219 Total reward: -339.23225828832176 SOC: 0.5305 Cumulative_SOC_deviation: 97.3677 Fuel Consumption: 47.1292

maximum steps, simulation is done ... 
elapsed_time: 85.425
Episode: 168 Exploration P: 0.0215 Total reward: -523.8138929274267 SOC: 0.4243 Cumulative_SOC_deviation: 161.0507 Fuel Consumption: 40.6618

maximum steps, simulation is done ... 
elapsed_time: 85.995
Episode: 169 Exploration P: 0.0212 Total reward: -609.3819156218705 SOC: 0.4110 Cumulative_SOC_deviation: 190.3457 Fuel Consumption: 38.3447

maximum steps, simulation is done ... 
elapsed_time: 85.742
Episode: 170 Exploration P: 0.0209 Total reward: -603.9831643062676 SOC: 0.4681 Cumulative_SOC_deviation: 187.3433 Fuel Consumption

In [16]:
with open("DDPG_cycleOne_1to3.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [17]:
# results_dict