In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 
import scipy.io as sio

from vehicle_model_variant import Environment 
from cell_model import CellModel 
from driver_MDP import Driver_MDP 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
# env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)
driver = Driver_MDP(0.02)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 200 
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1.0 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 5000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 
    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
#     actor_model.load_weights("./DDPG1_trial1/actor_model_checkpoint")
#     critic_model.load_weights("./DDPG1_trial1/critic_model_checkpoint")
#     target_actor.load_weights("./DDPG1_trial1/target_actor_checkpoint")
#     target_critic.load_weights("./DDPG1_trial1/target_critic_checkpoint")
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    if not os.path.exists(root): 
        os.makedirs(root)
        
    actor_model.save_weights("./{}/actor_model.h5".format(root))
    critic_model.save_weights("./{}/critic_model.h5".format(root))
    target_actor.save_weights("./{}/target_actor.h5".format(root))
    target_critic.save_weights("./{}/target_critic.h5".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [14]:
def test_agent(actor_model, reward_factor):
#     test_cycle = driver.get_cycle() 
    test_cycle_path = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
    test_cycle = sio.loadmat(test_cycle_path)
    test_cycle = test_cycle["sch_cycle"][:, 1]
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0
    state = env.reset() 
    while True: 
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        action = policy_epsilon_greedy(tf_state, -1)
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
        
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    
    print("******************* Test is start *****************")
#     print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    plt.subplot(1, 2, 1)
    plt.plot(test_cycle)
    plt.subplot(1, 2, 2)
    plt.plot(env.history["Action"])
    plt.show() 
    return env.history  
    

In [15]:
# print(env.version)

# num_trials = 1
reward_factors = [4, 5, 6]
results_dict = {} 
driving_cycle_path = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
driving_cycle = sio.loadmat(driving_cycle_path)
driving_cycle = driving_cycle["sch_cycle"][:, 1]

for reward_factor in reward_factors: 
    print("")
    print("Trial {}".format(reward_factor))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    episode_test_history = [] 
    episode_num_test = [] 
    for ep in range(total_episodes): 
#         driving_cycle = driver.get_cycle() 
        env = initialization_env(driving_cycle, reward_factor)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
        print("")
        
        if (ep + 1) % 10 == 0: 
#             history = test_agent(actor_model, reward_factor)
            history = env.history 
            episode_test_history.append(history) 
            episode_num_test.append(ep + 1)
            
#         if (ep + 1) % 200 == 0:             
    root = "DDPG_cycleOne_reward_factor{}".format(reward_factor)
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
            
    
    results_dict[reward_factor] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs, 
        "test_history": episode_test_history, 
        "test_episode_num": episode_num_test, 
    }


Trial 4

maximum steps, simulation is done ... 
elapsed_time: 15.105
Episode: 1 Exploration P: 1.0000 Total reward: -2142.471166421791 SOC: 1.0000 Cumulative_SOC_deviation: 496.9890 Fuel Consumption: 154.5153

maximum steps, simulation is done ... 
elapsed_time: 13.956
Episode: 2 Exploration P: 1.0000 Total reward: -2140.3879371382764 SOC: 1.0000 Cumulative_SOC_deviation: 496.3443 Fuel Consumption: 155.0106

maximum steps, simulation is done ... 
elapsed_time: 14.226
Episode: 3 Exploration P: 1.0000 Total reward: -2139.7106312414744 SOC: 1.0000 Cumulative_SOC_deviation: 497.2842 Fuel Consumption: 150.5738



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_

maximum steps, simulation is done ... 
elapsed_time: 90.822
Episode: 28 Exploration P: 0.5172 Total reward: -1512.5793457638638 SOC: 1.0000 Cumulative_SOC_deviation: 354.3819 Fuel Consumption: 95.0518

maximum steps, simulation is done ... 
elapsed_time: 90.694
Episode: 29 Exploration P: 0.5034 Total reward: -1308.757098543579 SOC: 1.0000 Cumulative_SOC_deviation: 303.8365 Fuel Consumption: 93.4113

maximum steps, simulation is done ... 
elapsed_time: 90.088
Episode: 30 Exploration P: 0.4901 Total reward: -1410.8530032757517 SOC: 1.0000 Cumulative_SOC_deviation: 329.7061 Fuel Consumption: 92.0286

maximum steps, simulation is done ... 
elapsed_time: 90.714
Episode: 31 Exploration P: 0.4771 Total reward: -1516.8545617813675 SOC: 1.0000 Cumulative_SOC_deviation: 355.5255 Fuel Consumption: 94.7526

maximum steps, simulation is done ... 
elapsed_time: 89.549
Episode: 32 Exploration P: 0.4644 Total reward: -1148.2398595315412 SOC: 1.0000 Cumulative_SOC_deviation: 265.1001 Fuel Consumption: 

maximum steps, simulation is done ... 
elapsed_time: 91.448
Episode: 69 Exploration P: 0.1745 Total reward: -1001.4112774469431 SOC: 0.4085 Cumulative_SOC_deviation: 240.1531 Fuel Consumption: 40.7991

maximum steps, simulation is done ... 
elapsed_time: 90.730
Episode: 70 Exploration P: 0.1701 Total reward: -1003.8005587557944 SOC: 0.3798 Cumulative_SOC_deviation: 241.2998 Fuel Consumption: 38.6013

maximum steps, simulation is done ... 
elapsed_time: 91.281
Episode: 71 Exploration P: 0.1657 Total reward: -1094.1020715379666 SOC: 0.3740 Cumulative_SOC_deviation: 264.1151 Fuel Consumption: 37.6417

maximum steps, simulation is done ... 
elapsed_time: 91.643
Episode: 72 Exploration P: 0.1615 Total reward: -1344.3586265624795 SOC: 0.3289 Cumulative_SOC_deviation: 327.2932 Fuel Consumption: 35.1860

maximum steps, simulation is done ... 
elapsed_time: 91.470
Episode: 73 Exploration P: 0.1574 Total reward: -1312.9258580877502 SOC: 0.2856 Cumulative_SOC_deviation: 320.2733 Fuel Consumption:

maximum steps, simulation is done ... 
elapsed_time: 91.547
Episode: 110 Exploration P: 0.0634 Total reward: -2142.5845036863775 SOC: 0.0166 Cumulative_SOC_deviation: 532.1299 Fuel Consumption: 14.0648

maximum steps, simulation is done ... 
elapsed_time: 92.207
Episode: 111 Exploration P: 0.0620 Total reward: -2143.2070829995887 SOC: 0.0068 Cumulative_SOC_deviation: 532.4403 Fuel Consumption: 13.4457

maximum steps, simulation is done ... 
elapsed_time: 91.696
Episode: 112 Exploration P: 0.0605 Total reward: -2182.1574108855243 SOC: 0.0173 Cumulative_SOC_deviation: 542.0515 Fuel Consumption: 13.9513

maximum steps, simulation is done ... 
elapsed_time: 91.700
Episode: 113 Exploration P: 0.0592 Total reward: -2063.134998341341 SOC: 0.0296 Cumulative_SOC_deviation: 512.1231 Fuel Consumption: 14.6426

battery power is 11289.612604841715(+) but condition is not avail
elapsed_time: 87.419
Episode: 114 Exploration P: 0.0579 Total reward: -3002.598669306116 SOC: -0.0004 Cumulative_SOC_deviat

maximum steps, simulation is done ... 
elapsed_time: 91.913
Episode: 151 Exploration P: 0.0274 Total reward: -296.56973462301175 SOC: 0.5651 Cumulative_SOC_deviation: 62.8946 Fuel Consumption: 44.9915

maximum steps, simulation is done ... 
elapsed_time: 92.206
Episode: 152 Exploration P: 0.0269 Total reward: -579.6660053358207 SOC: 0.3553 Cumulative_SOC_deviation: 137.2315 Fuel Consumption: 30.7402

battery power is 2943.195327219531(+) but condition is not avail
elapsed_time: 74.770
Episode: 153 Exploration P: 0.0266 Total reward: -2687.1472958609347 SOC: -0.0003 Cumulative_SOC_deviation: 420.2457 Fuel Consumption: 8.5681

battery power is 6231.497041857457(+) but condition is not avail
elapsed_time: 85.320
Episode: 154 Exploration P: 0.0261 Total reward: -2981.9552523382235 SOC: -0.0001 Cumulative_SOC_deviation: 493.3900 Fuel Consumption: 10.7968

maximum steps, simulation is done ... 
elapsed_time: 91.937
Episode: 155 Exploration P: 0.0257 Total reward: -1108.2900124810649 SOC: 0.5

maximum steps, simulation is done ... 
elapsed_time: 91.742
Episode: 192 Exploration P: 0.0158 Total reward: -309.0318804213021 SOC: 0.5781 Cumulative_SOC_deviation: 66.0505 Fuel Consumption: 44.8297

maximum steps, simulation is done ... 
elapsed_time: 91.894
Episode: 193 Exploration P: 0.0156 Total reward: -222.44846613211408 SOC: 0.5625 Cumulative_SOC_deviation: 44.7638 Fuel Consumption: 43.3932

maximum steps, simulation is done ... 
elapsed_time: 91.720
Episode: 194 Exploration P: 0.0155 Total reward: -120.77169504905999 SOC: 0.5789 Cumulative_SOC_deviation: 19.0982 Fuel Consumption: 44.3791

maximum steps, simulation is done ... 
elapsed_time: 91.968
Episode: 195 Exploration P: 0.0153 Total reward: -253.67945354220834 SOC: 0.5522 Cumulative_SOC_deviation: 52.7559 Fuel Consumption: 42.6559

maximum steps, simulation is done ... 
elapsed_time: 92.078
Episode: 196 Exploration P: 0.0152 Total reward: -295.6657341298206 SOC: 0.5481 Cumulative_SOC_deviation: 63.1329 Fuel Consumption: 4

maximum steps, simulation is done ... 
elapsed_time: 90.514
Episode: 19 Exploration P: 0.6594 Total reward: -2241.8710356102147 SOC: 1.0000 Cumulative_SOC_deviation: 426.3778 Fuel Consumption: 109.9822

maximum steps, simulation is done ... 
elapsed_time: 90.198
Episode: 20 Exploration P: 0.6418 Total reward: -2114.276669867553 SOC: 1.0000 Cumulative_SOC_deviation: 401.4223 Fuel Consumption: 107.1653

maximum steps, simulation is done ... 
elapsed_time: 90.299
Episode: 21 Exploration P: 0.6247 Total reward: -2156.895621680585 SOC: 1.0000 Cumulative_SOC_deviation: 410.1256 Fuel Consumption: 106.2677

maximum steps, simulation is done ... 
elapsed_time: 90.619
Episode: 22 Exploration P: 0.6080 Total reward: -2169.093593164264 SOC: 1.0000 Cumulative_SOC_deviation: 412.7674 Fuel Consumption: 105.2565

maximum steps, simulation is done ... 
elapsed_time: 89.982
Episode: 23 Exploration P: 0.5918 Total reward: -2180.1591593544085 SOC: 1.0000 Cumulative_SOC_deviation: 415.4118 Fuel Consumption

maximum steps, simulation is done ... 
elapsed_time: 80.999
Episode: 60 Exploration P: 0.2206 Total reward: -944.1703830705922 SOC: 0.4840 Cumulative_SOC_deviation: 179.6363 Fuel Consumption: 45.9891

maximum steps, simulation is done ... 
elapsed_time: 82.147
Episode: 61 Exploration P: 0.2149 Total reward: -810.2613779228825 SOC: 0.5383 Cumulative_SOC_deviation: 152.0146 Fuel Consumption: 50.1886

maximum steps, simulation is done ... 
elapsed_time: 81.997
Episode: 62 Exploration P: 0.2094 Total reward: -893.1395240693337 SOC: 0.4955 Cumulative_SOC_deviation: 169.2175 Fuel Consumption: 47.0519

maximum steps, simulation is done ... 
elapsed_time: 82.389
Episode: 63 Exploration P: 0.2040 Total reward: -1115.078191835599 SOC: 0.4509 Cumulative_SOC_deviation: 214.2491 Fuel Consumption: 43.8326

maximum steps, simulation is done ... 
elapsed_time: 82.609
Episode: 64 Exploration P: 0.1987 Total reward: -1241.3504146945786 SOC: 0.4073 Cumulative_SOC_deviation: 240.1536 Fuel Consumption: 40.

maximum steps, simulation is done ... 
elapsed_time: 82.662
Episode: 101 Exploration P: 0.0783 Total reward: -2568.090731959146 SOC: 0.0286 Cumulative_SOC_deviation: 510.6814 Fuel Consumption: 14.6839

maximum steps, simulation is done ... 
elapsed_time: 82.917
Episode: 102 Exploration P: 0.0765 Total reward: -2587.746625695416 SOC: 0.0559 Cumulative_SOC_deviation: 514.2390 Fuel Consumption: 16.5515

maximum steps, simulation is done ... 
elapsed_time: 83.079
Episode: 103 Exploration P: 0.0747 Total reward: -2760.2501889822697 SOC: 0.0118 Cumulative_SOC_deviation: 549.2742 Fuel Consumption: 13.8791

maximum steps, simulation is done ... 
elapsed_time: 82.461
Episode: 104 Exploration P: 0.0729 Total reward: -2371.9763471315896 SOC: 0.0706 Cumulative_SOC_deviation: 470.8869 Fuel Consumption: 17.5420

maximum steps, simulation is done ... 
elapsed_time: 83.384
Episode: 105 Exploration P: 0.0712 Total reward: -2373.420271586062 SOC: 0.0838 Cumulative_SOC_deviation: 470.9693 Fuel Consumptio

maximum steps, simulation is done ... 
elapsed_time: 83.511
Episode: 141 Exploration P: 0.0335 Total reward: -212.62421303384517 SOC: 0.5776 Cumulative_SOC_deviation: 32.9928 Fuel Consumption: 47.6601

maximum steps, simulation is done ... 
elapsed_time: 83.495
Episode: 142 Exploration P: 0.0328 Total reward: -957.9551826214317 SOC: 0.4511 Cumulative_SOC_deviation: 183.3957 Fuel Consumption: 40.9769

maximum steps, simulation is done ... 
elapsed_time: 83.290
Episode: 143 Exploration P: 0.0322 Total reward: -895.31896506485 SOC: 0.4142 Cumulative_SOC_deviation: 171.2137 Fuel Consumption: 39.2507

maximum steps, simulation is done ... 
elapsed_time: 83.075
Episode: 144 Exploration P: 0.0316 Total reward: -795.071347622202 SOC: 0.3703 Cumulative_SOC_deviation: 151.9408 Fuel Consumption: 35.3676

maximum steps, simulation is done ... 
elapsed_time: 82.847
Episode: 145 Exploration P: 0.0310 Total reward: -348.1163735064891 SOC: 0.5479 Cumulative_SOC_deviation: 60.1268 Fuel Consumption: 47.

maximum steps, simulation is done ... 
elapsed_time: 83.449
Episode: 182 Exploration P: 0.0176 Total reward: -273.0150177609702 SOC: 0.5615 Cumulative_SOC_deviation: 45.6322 Fuel Consumption: 44.8540

maximum steps, simulation is done ... 
elapsed_time: 84.016
Episode: 183 Exploration P: 0.0174 Total reward: -316.58858182140074 SOC: 0.5685 Cumulative_SOC_deviation: 54.4660 Fuel Consumption: 44.2586

maximum steps, simulation is done ... 
elapsed_time: 84.225
Episode: 184 Exploration P: 0.0172 Total reward: -291.9938047836299 SOC: 0.5712 Cumulative_SOC_deviation: 49.5273 Fuel Consumption: 44.3575

maximum steps, simulation is done ... 
elapsed_time: 83.702
Episode: 185 Exploration P: 0.0170 Total reward: -322.63727192059997 SOC: 0.5750 Cumulative_SOC_deviation: 55.2304 Fuel Consumption: 46.4854

maximum steps, simulation is done ... 
elapsed_time: 83.641
Episode: 186 Exploration P: 0.0168 Total reward: -311.650974967043 SOC: 0.5410 Cumulative_SOC_deviation: 53.8112 Fuel Consumption: 42.

maximum steps, simulation is done ... 
elapsed_time: 80.775
Episode: 9 Exploration P: 0.8646 Total reward: -3029.943083397055 SOC: 1.0000 Cumulative_SOC_deviation: 482.1219 Fuel Consumption: 137.2118

maximum steps, simulation is done ... 
elapsed_time: 81.448
Episode: 10 Exploration P: 0.8414 Total reward: -3063.728963174049 SOC: 1.0000 Cumulative_SOC_deviation: 487.8698 Fuel Consumption: 136.5101

maximum steps, simulation is done ... 
elapsed_time: 81.768
Episode: 11 Exploration P: 0.8189 Total reward: -2955.795437069362 SOC: 1.0000 Cumulative_SOC_deviation: 470.7493 Fuel Consumption: 131.2995

maximum steps, simulation is done ... 
elapsed_time: 82.001
Episode: 12 Exploration P: 0.7970 Total reward: -2978.5896774509247 SOC: 1.0000 Cumulative_SOC_deviation: 474.8751 Fuel Consumption: 129.3390

maximum steps, simulation is done ... 
elapsed_time: 82.175
Episode: 13 Exploration P: 0.7757 Total reward: -2816.4567290687837 SOC: 1.0000 Cumulative_SOC_deviation: 448.9209 Fuel Consumption:

maximum steps, simulation is done ... 
elapsed_time: 82.817
Episode: 50 Exploration P: 0.2872 Total reward: -555.0084752012301 SOC: 0.6665 Cumulative_SOC_deviation: 82.5855 Fuel Consumption: 59.4955

maximum steps, simulation is done ... 
elapsed_time: 83.216
Episode: 51 Exploration P: 0.2797 Total reward: -478.1474067545792 SOC: 0.7345 Cumulative_SOC_deviation: 68.8880 Fuel Consumption: 64.8197

maximum steps, simulation is done ... 
elapsed_time: 83.353
Episode: 52 Exploration P: 0.2724 Total reward: -818.3381299545536 SOC: 0.6271 Cumulative_SOC_deviation: 126.9157 Fuel Consumption: 56.8438

maximum steps, simulation is done ... 
elapsed_time: 82.976
Episode: 53 Exploration P: 0.2653 Total reward: -583.5608424283475 SOC: 0.6732 Cumulative_SOC_deviation: 87.2410 Fuel Consumption: 60.1146

maximum steps, simulation is done ... 
elapsed_time: 83.007
Episode: 54 Exploration P: 0.2584 Total reward: -622.18650499543 SOC: 0.6215 Cumulative_SOC_deviation: 94.3872 Fuel Consumption: 55.8636

m

maximum steps, simulation is done ... 
elapsed_time: 83.080
Episode: 91 Exploration P: 0.0999 Total reward: -2360.254099166753 SOC: 0.1697 Cumulative_SOC_deviation: 389.3978 Fuel Consumption: 23.8670

maximum steps, simulation is done ... 
elapsed_time: 82.741
Episode: 92 Exploration P: 0.0975 Total reward: -2562.45574270259 SOC: 0.1449 Cumulative_SOC_deviation: 423.3406 Fuel Consumption: 22.4122

maximum steps, simulation is done ... 
elapsed_time: 83.088
Episode: 93 Exploration P: 0.0951 Total reward: -2668.9586938545885 SOC: 0.1182 Cumulative_SOC_deviation: 441.4247 Fuel Consumption: 20.4104

maximum steps, simulation is done ... 
elapsed_time: 83.098
Episode: 94 Exploration P: 0.0928 Total reward: -3063.7853616150414 SOC: 0.0535 Cumulative_SOC_deviation: 507.9222 Fuel Consumption: 16.2522

maximum steps, simulation is done ... 
elapsed_time: 83.058
Episode: 95 Exploration P: 0.0906 Total reward: -2535.164719481387 SOC: 0.1593 Cumulative_SOC_deviation: 418.6184 Fuel Consumption: 23.

maximum steps, simulation is done ... 
elapsed_time: 83.379
Episode: 132 Exploration P: 0.0392 Total reward: -126.64709681789758 SOC: 0.5998 Cumulative_SOC_deviation: 13.5103 Fuel Consumption: 45.5851

maximum steps, simulation is done ... 
elapsed_time: 83.957
Episode: 133 Exploration P: 0.0384 Total reward: -110.48388294313438 SOC: 0.5982 Cumulative_SOC_deviation: 10.7652 Fuel Consumption: 45.8925

maximum steps, simulation is done ... 
elapsed_time: 83.797
Episode: 134 Exploration P: 0.0376 Total reward: -122.07126492408145 SOC: 0.5936 Cumulative_SOC_deviation: 12.7546 Fuel Consumption: 45.5440

maximum steps, simulation is done ... 
elapsed_time: 83.409
Episode: 135 Exploration P: 0.0369 Total reward: -115.53187794354925 SOC: 0.5995 Cumulative_SOC_deviation: 11.7262 Fuel Consumption: 45.1748

maximum steps, simulation is done ... 
elapsed_time: 83.222
Episode: 136 Exploration P: 0.0361 Total reward: -104.96056410443778 SOC: 0.5987 Cumulative_SOC_deviation: 9.8630 Fuel Consumption: 

maximum steps, simulation is done ... 
elapsed_time: 83.895
Episode: 173 Exploration P: 0.0195 Total reward: -130.14607137698232 SOC: 0.5924 Cumulative_SOC_deviation: 14.2231 Fuel Consumption: 44.8077

maximum steps, simulation is done ... 
elapsed_time: 83.678
Episode: 174 Exploration P: 0.0192 Total reward: -135.77078936686308 SOC: 0.5936 Cumulative_SOC_deviation: 15.0616 Fuel Consumption: 45.4015

maximum steps, simulation is done ... 
elapsed_time: 84.339
Episode: 175 Exploration P: 0.0190 Total reward: -164.92024638313737 SOC: 0.5810 Cumulative_SOC_deviation: 19.9699 Fuel Consumption: 45.1009

maximum steps, simulation is done ... 
elapsed_time: 84.217
Episode: 176 Exploration P: 0.0187 Total reward: -257.97496567186505 SOC: 0.5736 Cumulative_SOC_deviation: 35.4844 Fuel Consumption: 45.0688

maximum steps, simulation is done ... 
elapsed_time: 83.784
Episode: 177 Exploration P: 0.0185 Total reward: -266.77592150340803 SOC: 0.5762 Cumulative_SOC_deviation: 36.8780 Fuel Consumption:

In [16]:
with open("DDPG_cycleOne_4to6.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [17]:
# results_dict