In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 
import scipy.io as sio

from vehicle_model_variant import Environment 
from cell_model import CellModel 
from driver_MDP import Driver_MDP 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
# env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)
driver = Driver_MDP(0.02)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 200 
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1.0 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 5000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 
    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
#     actor_model.load_weights("./DDPG1_trial1/actor_model_checkpoint")
#     critic_model.load_weights("./DDPG1_trial1/critic_model_checkpoint")
#     target_actor.load_weights("./DDPG1_trial1/target_actor_checkpoint")
#     target_critic.load_weights("./DDPG1_trial1/target_critic_checkpoint")
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    if not os.path.exists(root): 
        os.makedirs(root)
        
    actor_model.save_weights("./{}/actor_model.h5".format(root))
    critic_model.save_weights("./{}/critic_model.h5".format(root))
    target_actor.save_weights("./{}/target_actor.h5".format(root))
    target_critic.save_weights("./{}/target_critic.h5".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [14]:
def test_agent(actor_model, reward_factor):
#     test_cycle = driver.get_cycle() 
    test_cycle_path = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
    test_cycle = sio.loadmat(test_cycle_path)
    test_cycle = test_cycle["sch_cycle"][:, 1]
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0
    state = env.reset() 
    while True: 
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        action = policy_epsilon_greedy(tf_state, -1)
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
        
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    
    print("******************* Test is start *****************")
#     print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    plt.subplot(1, 2, 1)
    plt.plot(test_cycle)
    plt.subplot(1, 2, 2)
    plt.plot(env.history["Action"])
    plt.show() 
    return env.history  
    

In [15]:
# print(env.version)

# num_trials = 1
reward_factors = [4, 5, 6]
results_dict = {} 
driving_cycle_path = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
driving_cycle = sio.loadmat(driving_cycle_path)
driving_cycle = driving_cycle["sch_cycle"][:, 1]

for reward_factor in reward_factors: 
    print("")
    print("Trial {}".format(reward_factor))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    episode_test_history = [] 
    episode_num_test = [] 
    for ep in range(total_episodes): 
#         driving_cycle = driver.get_cycle() 
        env = initialization_env(driving_cycle, reward_factor)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
        print("")
        
        if (ep + 1) % 10 == 0: 
#             history = test_agent(actor_model, reward_factor)
            history = env.history 
            episode_test_history.append(history) 
            episode_num_test.append(ep + 1)
            
#         if (ep + 1) % 200 == 0:             
    root = "DDPG_cycleOne_reward_factor{}".format(reward_factor)
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
            
    
    results_dict[reward_factor] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs, 
        "test_history": episode_test_history, 
        "test_episode_num": episode_num_test, 
    }


Trial 4

maximum steps, simulation is done ... 
elapsed_time: 9.347
Episode: 1 Exploration P: 1.0000 Total reward: -2137.9022247450516 SOC: 0.9996 Cumulative_SOC_deviation: 485.5456 Fuel Consumption: 195.7199

maximum steps, simulation is done ... 
elapsed_time: 9.184
Episode: 2 Exploration P: 1.0000 Total reward: -2128.0913946949604 SOC: 1.0000 Cumulative_SOC_deviation: 483.9072 Fuel Consumption: 192.4626

maximum steps, simulation is done ... 
elapsed_time: 9.720
Episode: 3 Exploration P: 1.0000 Total reward: -2045.229986222075 SOC: 0.9994 Cumulative_SOC_deviation: 464.7664 Fuel Consumption: 186.1644



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_flo

maximum steps, simulation is done ... 
elapsed_time: 85.816
Episode: 28 Exploration P: 0.5172 Total reward: -479.9163216701081 SOC: 0.7898 Cumulative_SOC_deviation: 85.1040 Fuel Consumption: 139.5005

maximum steps, simulation is done ... 
elapsed_time: 88.089
Episode: 29 Exploration P: 0.5034 Total reward: -521.1393039519452 SOC: 0.8215 Cumulative_SOC_deviation: 94.5773 Fuel Consumption: 142.8299

maximum steps, simulation is done ... 
elapsed_time: 86.892
Episode: 30 Exploration P: 0.4901 Total reward: -353.8176414565428 SOC: 0.6256 Cumulative_SOC_deviation: 56.5995 Fuel Consumption: 127.4196

maximum steps, simulation is done ... 
elapsed_time: 87.857
Episode: 31 Exploration P: 0.4771 Total reward: -621.7246640657397 SOC: 0.8026 Cumulative_SOC_deviation: 119.8731 Fuel Consumption: 142.2321

maximum steps, simulation is done ... 
elapsed_time: 86.002
Episode: 32 Exploration P: 0.4644 Total reward: -866.5475790810062 SOC: 0.4637 Cumulative_SOC_deviation: 187.6064 Fuel Consumption: 116

  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (


maximum steps, simulation is done ... 
elapsed_time: 79.735
Episode: 44 Exploration P: 0.3368 Total reward: -2213.478872887127 SOC: 0.0311 Cumulative_SOC_deviation: 531.3512 Fuel Consumption: 88.0742

maximum steps, simulation is done ... 
elapsed_time: 80.131
Episode: 45 Exploration P: 0.3280 Total reward: -1759.3641294877586 SOC: 0.0991 Cumulative_SOC_deviation: 417.0520 Fuel Consumption: 91.1563

maximum steps, simulation is done ... 
elapsed_time: 79.459
Episode: 46 Exploration P: 0.3194 Total reward: -1722.1019561592154 SOC: 0.0924 Cumulative_SOC_deviation: 407.8833 Fuel Consumption: 90.5688

maximum steps, simulation is done ... 
elapsed_time: 79.698
Episode: 47 Exploration P: 0.3110 Total reward: -2126.997224687977 SOC: 0.0109 Cumulative_SOC_deviation: 510.2281 Fuel Consumption: 86.0848

battery power is 15515.997062127959(+) but condition is not avail
elapsed_time: 75.405
Episode: 48 Exploration P: 0.3033 Total reward: -2947.414711969484 SOC: -0.0001 Cumulative_SOC_deviation: 4

  2 * r_dis)) * (v_dis - (v_dis ** 2 - 4 * r_dis * p_bat) ** (0.5)) * (p_bat >= 0)


maximum steps, simulation is done ... 
elapsed_time: 79.506
Episode: 55 Exploration P: 0.2586 Total reward: -872.758434156487 SOC: 0.5934 Cumulative_SOC_deviation: 186.8362 Fuel Consumption: 125.4135

maximum steps, simulation is done ... 
elapsed_time: 79.703
Episode: 56 Exploration P: 0.2519 Total reward: -287.3128365219051 SOC: 0.5762 Cumulative_SOC_deviation: 42.2426 Fuel Consumption: 118.3426

maximum steps, simulation is done ... 
elapsed_time: 79.885
Episode: 57 Exploration P: 0.2453 Total reward: -330.2741553329674 SOC: 0.5404 Cumulative_SOC_deviation: 54.1854 Fuel Consumption: 113.5325

maximum steps, simulation is done ... 
elapsed_time: 79.754
Episode: 58 Exploration P: 0.2390 Total reward: -739.0322257585187 SOC: 0.4084 Cumulative_SOC_deviation: 158.8671 Fuel Consumption: 103.5638

maximum steps, simulation is done ... 
elapsed_time: 80.387
Episode: 59 Exploration P: 0.2328 Total reward: -590.0392138243818 SOC: 0.5574 Cumulative_SOC_deviation: 117.8659 Fuel Consumption: 118

maximum steps, simulation is done ... 
elapsed_time: 106.703
Episode: 96 Exploration P: 0.0926 Total reward: -2166.3194442864415 SOC: 0.1121 Cumulative_SOC_deviation: 521.5685 Fuel Consumption: 80.0456

maximum steps, simulation is done ... 
elapsed_time: 106.741
Episode: 97 Exploration P: 0.0903 Total reward: -742.2013283233824 SOC: 0.3316 Cumulative_SOC_deviation: 162.5552 Fuel Consumption: 91.9803

maximum steps, simulation is done ... 
elapsed_time: 107.000
Episode: 98 Exploration P: 0.0882 Total reward: -2374.9289356631425 SOC: 0.0752 Cumulative_SOC_deviation: 573.4911 Fuel Consumption: 80.9645

maximum steps, simulation is done ... 
elapsed_time: 107.315
Episode: 99 Exploration P: 0.0860 Total reward: -675.7967992368035 SOC: 0.2957 Cumulative_SOC_deviation: 146.6546 Fuel Consumption: 89.1783

maximum steps, simulation is done ... 
elapsed_time: 109.135
Episode: 100 Exploration P: 0.0840 Total reward: -1631.8428197563483 SOC: 0.1294 Cumulative_SOC_deviation: 387.9740 Fuel Consumpt

maximum steps, simulation is done ... 
elapsed_time: 106.631
Episode: 137 Exploration P: 0.0368 Total reward: -1123.9836419639944 SOC: 0.4077 Cumulative_SOC_deviation: 256.8639 Fuel Consumption: 96.5280

maximum steps, simulation is done ... 
elapsed_time: 106.967
Episode: 138 Exploration P: 0.0361 Total reward: -1128.7519957408504 SOC: 0.3973 Cumulative_SOC_deviation: 258.2925 Fuel Consumption: 95.5819

maximum steps, simulation is done ... 
elapsed_time: 106.971
Episode: 139 Exploration P: 0.0354 Total reward: -1259.789287644548 SOC: 0.4189 Cumulative_SOC_deviation: 290.7946 Fuel Consumption: 96.6108

maximum steps, simulation is done ... 
elapsed_time: 107.856
Episode: 140 Exploration P: 0.0347 Total reward: -1429.1834989539982 SOC: 0.3474 Cumulative_SOC_deviation: 333.3641 Fuel Consumption: 95.7273

maximum steps, simulation is done ... 
elapsed_time: 107.678
Episode: 141 Exploration P: 0.0340 Total reward: -1194.477599185368 SOC: 0.4046 Cumulative_SOC_deviation: 273.8626 Fuel Cons

maximum steps, simulation is done ... 
elapsed_time: 82.439
Episode: 177 Exploration P: 0.0191 Total reward: -1385.8501262000193 SOC: 0.2909 Cumulative_SOC_deviation: 324.7889 Fuel Consumption: 86.6945

maximum steps, simulation is done ... 
elapsed_time: 81.269
Episode: 178 Exploration P: 0.0189 Total reward: -687.4505397580614 SOC: 0.3432 Cumulative_SOC_deviation: 149.4088 Fuel Consumption: 89.8153

maximum steps, simulation is done ... 
elapsed_time: 83.030
Episode: 179 Exploration P: 0.0187 Total reward: -741.947462310248 SOC: 1.0000 Cumulative_SOC_deviation: 145.9468 Fuel Consumption: 158.1603

maximum steps, simulation is done ... 
elapsed_time: 82.349
Episode: 180 Exploration P: 0.0184 Total reward: -2406.1201084693457 SOC: 1.0000 Cumulative_SOC_deviation: 520.8584 Fuel Consumption: 322.6863

maximum steps, simulation is done ... 
elapsed_time: 82.783
Episode: 181 Exploration P: 0.0182 Total reward: -2418.584393666141 SOC: 1.0000 Cumulative_SOC_deviation: 523.3212 Fuel Consumpti

maximum steps, simulation is done ... 
elapsed_time: 38.943
Episode: 4 Exploration P: 0.9903 Total reward: -2598.3966623033407 SOC: 1.0000 Cumulative_SOC_deviation: 481.3043 Fuel Consumption: 191.8750

maximum steps, simulation is done ... 
elapsed_time: 78.924
Episode: 5 Exploration P: 0.9638 Total reward: -2604.141512435289 SOC: 0.9993 Cumulative_SOC_deviation: 483.6263 Fuel Consumption: 186.0098

maximum steps, simulation is done ... 
elapsed_time: 87.059
Episode: 6 Exploration P: 0.9379 Total reward: -2508.7858595644516 SOC: 1.0000 Cumulative_SOC_deviation: 465.6932 Fuel Consumption: 180.3198

maximum steps, simulation is done ... 
elapsed_time: 106.361
Episode: 7 Exploration P: 0.9128 Total reward: -2590.820558425093 SOC: 1.0000 Cumulative_SOC_deviation: 481.3044 Fuel Consumption: 184.2987

maximum steps, simulation is done ... 
elapsed_time: 92.224
Episode: 8 Exploration P: 0.8883 Total reward: -2507.244215771914 SOC: 0.9984 Cumulative_SOC_deviation: 466.9496 Fuel Consumption: 17

maximum steps, simulation is done ... 
elapsed_time: 83.669
Episode: 45 Exploration P: 0.3280 Total reward: -2317.776886362148 SOC: 0.0736 Cumulative_SOC_deviation: 445.7034 Fuel Consumption: 89.2597

maximum steps, simulation is done ... 
elapsed_time: 82.269
Episode: 46 Exploration P: 0.3194 Total reward: -2455.637462121732 SOC: 0.0499 Cumulative_SOC_deviation: 473.3147 Fuel Consumption: 89.0638

maximum steps, simulation is done ... 
elapsed_time: 82.507
Episode: 47 Exploration P: 0.3110 Total reward: -2667.5578940239766 SOC: 0.0612 Cumulative_SOC_deviation: 515.6679 Fuel Consumption: 89.2184

battery power is 4306.664468358059(+) but condition is not avail
elapsed_time: 79.573
Episode: 48 Exploration P: 0.3030 Total reward: -3525.3324484926543 SOC: -0.0003 Cumulative_SOC_deviation: 488.8492 Fuel Consumption: 84.0890

battery power is 7412.919352546885(+) but condition is not avail
elapsed_time: 70.165
Episode: 49 Exploration P: 0.2960 Total reward: -3311.6494015709504 SOC: -0.0008 

maximum steps, simulation is done ... 
elapsed_time: 103.678
Episode: 86 Exploration P: 0.1148 Total reward: -1443.769353758394 SOC: 0.3734 Cumulative_SOC_deviation: 268.4396 Fuel Consumption: 101.5716

maximum steps, simulation is done ... 
elapsed_time: 104.744
Episode: 87 Exploration P: 0.1120 Total reward: -1656.9467052726454 SOC: 0.3185 Cumulative_SOC_deviation: 311.6035 Fuel Consumption: 98.9294

maximum steps, simulation is done ... 
elapsed_time: 103.558
Episode: 88 Exploration P: 0.1092 Total reward: -1612.8869626548096 SOC: 0.3537 Cumulative_SOC_deviation: 301.5042 Fuel Consumption: 105.3659

maximum steps, simulation is done ... 
elapsed_time: 104.060
Episode: 89 Exploration P: 0.1065 Total reward: -1754.3387373705577 SOC: 0.2990 Cumulative_SOC_deviation: 330.3890 Fuel Consumption: 102.3936

maximum steps, simulation is done ... 
elapsed_time: 95.584
Episode: 90 Exploration P: 0.1039 Total reward: -2160.5476062209596 SOC: 0.3042 Cumulative_SOC_deviation: 410.8985 Fuel Consum

maximum steps, simulation is done ... 
elapsed_time: 111.694
Episode: 127 Exploration P: 0.0440 Total reward: -863.2217444474921 SOC: 0.4450 Cumulative_SOC_deviation: 149.3774 Fuel Consumption: 116.3346

maximum steps, simulation is done ... 
elapsed_time: 100.157
Episode: 128 Exploration P: 0.0431 Total reward: -926.7550692264302 SOC: 0.4877 Cumulative_SOC_deviation: 161.4170 Fuel Consumption: 119.6700

maximum steps, simulation is done ... 
elapsed_time: 79.512
Episode: 129 Exploration P: 0.0422 Total reward: -889.5543793372663 SOC: 0.4745 Cumulative_SOC_deviation: 154.2621 Fuel Consumption: 118.2437

maximum steps, simulation is done ... 
elapsed_time: 86.703
Episode: 130 Exploration P: 0.0413 Total reward: -1017.8193218649276 SOC: 0.4621 Cumulative_SOC_deviation: 180.3903 Fuel Consumption: 115.8679

maximum steps, simulation is done ... 
elapsed_time: 85.433
Episode: 131 Exploration P: 0.0405 Total reward: -1086.654764471562 SOC: 0.4513 Cumulative_SOC_deviation: 194.3778 Fuel Consu

maximum steps, simulation is done ... 
elapsed_time: 83.007
Episode: 168 Exploration P: 0.0210 Total reward: -915.8419370149378 SOC: 0.4346 Cumulative_SOC_deviation: 159.8601 Fuel Consumption: 116.5414

maximum steps, simulation is done ... 
elapsed_time: 83.692
Episode: 169 Exploration P: 0.0207 Total reward: -1214.77064596854 SOC: 0.3980 Cumulative_SOC_deviation: 220.6758 Fuel Consumption: 111.3919

maximum steps, simulation is done ... 
elapsed_time: 82.540
Episode: 170 Exploration P: 0.0204 Total reward: -1238.4577096848336 SOC: 0.4186 Cumulative_SOC_deviation: 224.8220 Fuel Consumption: 114.3478

maximum steps, simulation is done ... 
elapsed_time: 82.103
Episode: 171 Exploration P: 0.0202 Total reward: -1115.3437444062224 SOC: 0.4178 Cumulative_SOC_deviation: 200.0417 Fuel Consumption: 115.1354

maximum steps, simulation is done ... 
elapsed_time: 82.466
Episode: 172 Exploration P: 0.0199 Total reward: -1161.9577165064231 SOC: 0.4343 Cumulative_SOC_deviation: 209.0889 Fuel Consum



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

maximum steps, simulation is done ... 
elapsed_time: 37.359
Episode: 4 Exploration P: 0.9903 Tota

maximum steps, simulation is done ... 
elapsed_time: 82.346
Episode: 34 Exploration P: 0.4401 Total reward: -1059.8300388496016 SOC: 0.4866 Cumulative_SOC_deviation: 156.9067 Fuel Consumption: 118.3898

maximum steps, simulation is done ... 
elapsed_time: 82.440
Episode: 35 Exploration P: 0.4285 Total reward: -1258.306168099596 SOC: 0.4282 Cumulative_SOC_deviation: 190.8124 Fuel Consumption: 113.4317

maximum steps, simulation is done ... 
elapsed_time: 82.794
Episode: 36 Exploration P: 0.4171 Total reward: -818.1601488038069 SOC: 0.5025 Cumulative_SOC_deviation: 116.5803 Fuel Consumption: 118.6785

maximum steps, simulation is done ... 
elapsed_time: 83.100
Episode: 37 Exploration P: 0.4061 Total reward: -1180.6680129610231 SOC: 0.4555 Cumulative_SOC_deviation: 177.5274 Fuel Consumption: 115.5036

maximum steps, simulation is done ... 
elapsed_time: 82.943
Episode: 38 Exploration P: 0.3954 Total reward: -1681.4446804667648 SOC: 0.2954 Cumulative_SOC_deviation: 262.9796 Fuel Consumptio

maximum steps, simulation is done ... 
elapsed_time: 83.845
Episode: 75 Exploration P: 0.1519 Total reward: -651.4614299017019 SOC: 0.5777 Cumulative_SOC_deviation: 89.3479 Fuel Consumption: 115.3739

maximum steps, simulation is done ... 
elapsed_time: 83.630
Episode: 76 Exploration P: 0.1480 Total reward: -332.78539060050446 SOC: 0.5793 Cumulative_SOC_deviation: 36.9047 Fuel Consumption: 111.3573

maximum steps, simulation is done ... 
elapsed_time: 83.708
Episode: 77 Exploration P: 0.1443 Total reward: -632.1598343251085 SOC: 0.3237 Cumulative_SOC_deviation: 89.8235 Fuel Consumption: 93.2187

maximum steps, simulation is done ... 
elapsed_time: 82.963
Episode: 78 Exploration P: 0.1407 Total reward: -400.7847733590967 SOC: 0.5669 Cumulative_SOC_deviation: 48.7221 Fuel Consumption: 108.4524

maximum steps, simulation is done ... 
elapsed_time: 83.061
Episode: 79 Exploration P: 0.1371 Total reward: -270.88658925635593 SOC: 0.5923 Cumulative_SOC_deviation: 26.6793 Fuel Consumption: 110.

maximum steps, simulation is done ... 
elapsed_time: 83.831
Episode: 116 Exploration P: 0.0567 Total reward: -544.1912326653828 SOC: 0.5539 Cumulative_SOC_deviation: 72.9787 Fuel Consumption: 106.3192

maximum steps, simulation is done ... 
elapsed_time: 83.914
Episode: 117 Exploration P: 0.0555 Total reward: -389.57305568743544 SOC: 0.5849 Cumulative_SOC_deviation: 46.8409 Fuel Consumption: 108.5279

maximum steps, simulation is done ... 
elapsed_time: 83.931
Episode: 118 Exploration P: 0.0542 Total reward: -283.93906555149573 SOC: 0.5486 Cumulative_SOC_deviation: 29.7690 Fuel Consumption: 105.3253

maximum steps, simulation is done ... 
elapsed_time: 83.581
Episode: 119 Exploration P: 0.0530 Total reward: -326.09752014486185 SOC: 0.5695 Cumulative_SOC_deviation: 36.3125 Fuel Consumption: 108.2224

maximum steps, simulation is done ... 
elapsed_time: 83.930
Episode: 120 Exploration P: 0.0519 Total reward: -380.8122350264945 SOC: 0.5654 Cumulative_SOC_deviation: 45.7987 Fuel Consumptio

maximum steps, simulation is done ... 
elapsed_time: 74.814
Episode: 157 Exploration P: 0.0252 Total reward: -400.42140582358473 SOC: 0.5653 Cumulative_SOC_deviation: 48.2901 Fuel Consumption: 110.6809

maximum steps, simulation is done ... 
elapsed_time: 74.496
Episode: 158 Exploration P: 0.0247 Total reward: -839.8651542683363 SOC: 0.3381 Cumulative_SOC_deviation: 124.6157 Fuel Consumption: 92.1708

maximum steps, simulation is done ... 
elapsed_time: 74.680
Episode: 159 Exploration P: 0.0243 Total reward: -476.78587029214515 SOC: 0.5705 Cumulative_SOC_deviation: 61.5528 Fuel Consumption: 107.4688

maximum steps, simulation is done ... 
elapsed_time: 74.787
Episode: 160 Exploration P: 0.0240 Total reward: -447.54217421722177 SOC: 0.5425 Cumulative_SOC_deviation: 56.9604 Fuel Consumption: 105.7800

maximum steps, simulation is done ... 
elapsed_time: 74.851
Episode: 161 Exploration P: 0.0236 Total reward: -2333.7475359340733 SOC: 0.1314 Cumulative_SOC_deviation: 374.8223 Fuel Consumpt

maximum steps, simulation is done ... 
elapsed_time: 78.075
Episode: 198 Exploration P: 0.0149 Total reward: -251.69998076707736 SOC: 0.5905 Cumulative_SOC_deviation: 23.8851 Fuel Consumption: 108.3897

maximum steps, simulation is done ... 
elapsed_time: 78.050
Episode: 199 Exploration P: 0.0148 Total reward: -310.9226802977732 SOC: 0.5328 Cumulative_SOC_deviation: 34.3291 Fuel Consumption: 104.9480

maximum steps, simulation is done ... 
elapsed_time: 77.898
Episode: 200 Exploration P: 0.0147 Total reward: -507.32121686137003 SOC: 0.5692 Cumulative_SOC_deviation: 66.7676 Fuel Consumption: 106.7155

model is saved..


In [16]:
with open("DDPG_cycleOne_4to6.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [17]:
# results_dict