In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 

from vehicle_model_DDPG1 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 500
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 10000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 

    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    actor_model.save_weights("./{}/actor_model_checkpoint".format(root))
    critic_model.save_weights("./{}/critic_model_checkpoint".format(root))
    target_actor.save_weights("./{}/target_actor_checkpoint".format(root))
    target_critic.save_weights("./{}/target_critic_checkpoint".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [14]:
print(env.version)

num_trials = 3
results_dict = {} 
driving_cycle_paths = glob.glob("training/*.mat")[:1]

for trial in range(num_trials): 
    print("")
    print("Trial {}".format(trial))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    for ep in range(total_episodes): 
        driving_cycle_path = np.random.choice(driving_cycle_paths)
        print(driving_cycle_path)
        env = initialization_env(driving_cycle_path, 10)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
        print("")
    
    root = "DDPG1_trial{}".format(trial+1)
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs 
    }

1

Trial 0

training\03_nedc.mat


  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (
  del_i = (1 / (2 * r_cha)) * (v_cha - (v_cha ** 2 - 4 * r_cha * p_bat) ** (0.5)) * (p_bat < 0) + (1 / (


SOC is nan...
elapsed_time: 14.109
Episode: 1 Exploration P: 1.0000 Total reward: -3076.0200908294455 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 48.5147

training\03_nedc.mat
SOC is nan...
elapsed_time: 14.236
Episode: 2 Exploration P: 1.0000 Total reward: -3173.4850215001075 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 49.9365

training\03_nedc.mat


  2 * r_dis)) * (v_dis - (v_dis ** 2 - 4 * r_dis * p_bat) ** (0.5)) * (p_bat >= 0)
  2 * r_dis)) * (v_dis - (v_dis ** 2 - 4 * r_dis * p_bat) ** (0.5)) * (p_bat >= 0)


SOC is nan...
elapsed_time: 14.005
Episode: 3 Exploration P: 1.0000 Total reward: -3222.044061353341 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 50.2340

training\03_nedc.mat
SOC is nan...
elapsed_time: 16.707
Episode: 4 Exploration P: 1.0000 Total reward: -3149.5395157929743 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 49.6595

training\03_nedc.mat
SOC is nan...
elapsed_time: 13.819
Episode: 5 Exploration P: 1.0000 Total reward: -3057.7346621001543 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 49.1951

training\03_nedc.mat
SOC is nan...
elapsed_time: 13.820
Episode: 6 Exploration P: 1.0000 Total reward: -3122.0378631306066 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 49.4498

training\03_nedc.mat
SOC is nan...
elapsed_time: 13.764
Episode: 7 Exploration P: 1.0000 Total reward: -3038.5932982566155 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 48.4302

training\03_nedc.mat
SOC is nan...
elapsed_time: 12.890
Episode: 8 Exploratio

SOC is nan...
elapsed_time: 81.069
Episode: 31 Exploration P: 0.5069 Total reward: -1679.7532895122954 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 26.9684

training\03_nedc.mat
SOC is nan...
elapsed_time: 72.443
Episode: 32 Exploration P: 0.4960 Total reward: -1650.2768290375707 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 25.7193

training\03_nedc.mat
SOC is nan...
elapsed_time: 77.591
Episode: 33 Exploration P: 0.4854 Total reward: -1739.4283772019862 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 27.3014

training\03_nedc.mat
SOC is nan...
elapsed_time: 76.308
Episode: 34 Exploration P: 0.4750 Total reward: -1919.6536001997013 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 29.3343

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.364
Episode: 35 Exploration P: 0.4648 Total reward: -1613.6973663788808 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 25.4226

training\03_nedc.mat
SOC is nan...
elapsed_time: 71.310
Episode: 36 Exp

SOC is nan...
elapsed_time: 82.314
Episode: 75 Exploration P: 0.1987 Total reward: -1754.9172311289453 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 10.7958

training\03_nedc.mat
SOC is nan...
elapsed_time: 81.999
Episode: 76 Exploration P: 0.1946 Total reward: -1784.2083210452722 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 10.5081

training\03_nedc.mat
SOC is nan...
elapsed_time: 80.253
Episode: 77 Exploration P: 0.1906 Total reward: -1653.1579092197774 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 11.5801

training\03_nedc.mat
SOC is nan...
elapsed_time: 66.705
Episode: 78 Exploration P: 0.1867 Total reward: -1878.750513441762 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 10.0994

training\03_nedc.mat
SOC is nan...
elapsed_time: 66.797
Episode: 79 Exploration P: 0.1828 Total reward: -1718.0764911984984 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 10.9499

training\03_nedc.mat
SOC is nan...
elapsed_time: 67.054
Episode: 80 Expl

SOC is nan...
elapsed_time: 65.121
Episode: 118 Exploration P: 0.0829 Total reward: -1942.4530613111501 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 5.6350

training\03_nedc.mat
SOC is nan...
elapsed_time: 65.628
Episode: 119 Exploration P: 0.0813 Total reward: -1965.8604777453174 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 5.3525

training\03_nedc.mat
SOC is nan...
elapsed_time: 64.611
Episode: 120 Exploration P: 0.0798 Total reward: -2022.6914275030776 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 4.1681

training\03_nedc.mat
SOC is nan...
elapsed_time: 65.731
Episode: 121 Exploration P: 0.0784 Total reward: -1988.7394145029793 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 4.5950

training\03_nedc.mat
SOC is nan...
elapsed_time: 64.903
Episode: 122 Exploration P: 0.0769 Total reward: -1942.0327257719684 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 5.8757

training\03_nedc.mat
SOC is nan...
elapsed_time: 65.167
Episode: 123 Ex

SOC is nan...
elapsed_time: 67.863
Episode: 162 Exploration P: 0.0383 Total reward: -1530.589231493971 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 25.0807

training\03_nedc.mat
SOC is nan...
elapsed_time: 67.456
Episode: 163 Exploration P: 0.0377 Total reward: -1708.6226001821829 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 18.0049

training\03_nedc.mat
SOC is nan...
elapsed_time: 67.274
Episode: 164 Exploration P: 0.0371 Total reward: -1999.892694390973 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 21.9745

training\03_nedc.mat
SOC is nan...
elapsed_time: 67.276
Episode: 165 Exploration P: 0.0365 Total reward: -1661.4738368501544 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 18.6775

training\03_nedc.mat
SOC is nan...
elapsed_time: 67.137
Episode: 166 Exploration P: 0.0360 Total reward: -1575.6064672044267 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 16.6704

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.147
Episode: 167

SOC is nan...
elapsed_time: 66.988
Episode: 205 Exploration P: 0.0210 Total reward: -2079.0180808551013 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 12.9999

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.050
Episode: 206 Exploration P: 0.0207 Total reward: -1326.4075310978592 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 31.1427

training\03_nedc.mat
SOC is nan...
elapsed_time: 67.975
Episode: 207 Exploration P: 0.0205 Total reward: -1330.0188187404333 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 29.7301

training\03_nedc.mat
SOC is nan...
elapsed_time: 67.905
Episode: 208 Exploration P: 0.0203 Total reward: -1285.0856574849654 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 29.3834

training\03_nedc.mat
SOC is nan...
elapsed_time: 67.212
Episode: 209 Exploration P: 0.0200 Total reward: -1534.1759075161788 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 18.1953

training\03_nedc.mat
SOC is nan...
elapsed_time: 67.361
Episode: 2

SOC is nan...
elapsed_time: 67.851
Episode: 248 Exploration P: 0.0142 Total reward: -1401.8835026948004 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 25.5777

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.069
Episode: 249 Exploration P: 0.0142 Total reward: -1472.6488866842028 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 25.0136

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.098
Episode: 250 Exploration P: 0.0141 Total reward: -1367.8477156416045 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 25.3540

training\03_nedc.mat
SOC is nan...
elapsed_time: 67.683
Episode: 251 Exploration P: 0.0140 Total reward: -1656.370978175206 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 28.3148

training\03_nedc.mat
SOC is nan...
elapsed_time: 67.671
Episode: 252 Exploration P: 0.0139 Total reward: -1284.5664212065992 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 25.4820

training\03_nedc.mat
SOC is nan...
elapsed_time: 66.992
Episode: 25

SOC is nan...
elapsed_time: 69.743
Episode: 291 Exploration P: 0.0116 Total reward: -2184.7582519847183 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.9437

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.668
Episode: 292 Exploration P: 0.0116 Total reward: -2201.0588845405227 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.8156

training\03_nedc.mat
SOC is nan...
elapsed_time: 69.031
Episode: 293 Exploration P: 0.0116 Total reward: -2186.3544859096155 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.9126

training\03_nedc.mat
SOC is nan...
elapsed_time: 69.358
Episode: 294 Exploration P: 0.0115 Total reward: -2204.243392967139 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.9892

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.655
Episode: 295 Exploration P: 0.0115 Total reward: -2190.800728486959 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.9050

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.900
Episode: 296 Expl

SOC is nan...
elapsed_time: 71.609
Episode: 335 Exploration P: 0.0106 Total reward: -1273.7149419467173 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 33.5821

training\03_nedc.mat
SOC is nan...
elapsed_time: 71.157
Episode: 336 Exploration P: 0.0106 Total reward: -1322.6116259846376 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 29.5502

training\03_nedc.mat
SOC is nan...
elapsed_time: 71.566
Episode: 337 Exploration P: 0.0106 Total reward: -1327.419370107614 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 23.3424

training\03_nedc.mat
SOC is nan...
elapsed_time: 72.174
Episode: 338 Exploration P: 0.0106 Total reward: -1232.560423965553 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 28.4986

training\03_nedc.mat
SOC is nan...
elapsed_time: 71.409
Episode: 339 Exploration P: 0.0106 Total reward: -1308.5296709062686 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 26.7777

training\03_nedc.mat
SOC is nan...
elapsed_time: 71.628
Episode: 340

SOC is nan...
elapsed_time: 71.303
Episode: 378 Exploration P: 0.0102 Total reward: -1224.4176387784707 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 26.1255

training\03_nedc.mat
SOC is nan...
elapsed_time: 72.037
Episode: 379 Exploration P: 0.0102 Total reward: -1207.2432483894074 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 27.2074

training\03_nedc.mat
SOC is nan...
elapsed_time: 71.975
Episode: 380 Exploration P: 0.0102 Total reward: -1282.5300147423663 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 25.9562

training\03_nedc.mat
SOC is nan...
elapsed_time: 71.671
Episode: 381 Exploration P: 0.0102 Total reward: -1296.6815171512103 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 23.5897

training\03_nedc.mat
SOC is nan...
elapsed_time: 72.359
Episode: 382 Exploration P: 0.0102 Total reward: -1232.6807528711915 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 29.7048

training\03_nedc.mat
SOC is nan...
elapsed_time: 71.368
Episode: 3

SOC is nan...
elapsed_time: 18.370
Episode: 9 Exploration P: 0.8200 Total reward: -3084.717953377819 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 48.0914

training\03_nedc.mat
SOC is nan...
elapsed_time: 70.194
Episode: 10 Exploration P: 0.8021 Total reward: -2869.509304475887 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 45.5671

training\03_nedc.mat
SOC is nan...
elapsed_time: 70.866
Episode: 11 Exploration P: 0.7846 Total reward: -2665.9934719513785 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 43.1684

training\03_nedc.mat
SOC is nan...
elapsed_time: 70.931
Episode: 12 Exploration P: 0.7676 Total reward: -2551.259787499115 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 41.7871

training\03_nedc.mat
SOC is nan...
elapsed_time: 70.423
Episode: 13 Exploration P: 0.7509 Total reward: -2389.2406686130525 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 39.1556

training\03_nedc.mat
SOC is nan...
elapsed_time: 72.390
Episode: 14 Explora

SOC is nan...
elapsed_time: 76.780
Episode: 53 Exploration P: 0.3161 Total reward: -1444.269620152118 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 16.4929

training\03_nedc.mat
SOC is nan...
elapsed_time: 77.223
Episode: 54 Exploration P: 0.3094 Total reward: -1436.5795351546435 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 17.3039

training\03_nedc.mat
SOC is nan...
elapsed_time: 77.510
Episode: 55 Exploration P: 0.3029 Total reward: -1464.8834384716092 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 16.8182

training\03_nedc.mat
SOC is nan...
elapsed_time: 71.397
Episode: 56 Exploration P: 0.2966 Total reward: -1456.687543111567 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 14.9791

training\03_nedc.mat
SOC is nan...
elapsed_time: 70.269
Episode: 57 Exploration P: 0.2904 Total reward: -1499.8546790976627 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 14.2398

training\03_nedc.mat
SOC is nan...
elapsed_time: 70.682
Episode: 58 Explo

SOC is nan...
elapsed_time: 71.605
Episode: 97 Exploration P: 0.1268 Total reward: -1939.4092097830228 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 8.7181

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.806
Episode: 98 Exploration P: 0.1244 Total reward: -1826.4059528609155 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 7.2633

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.746
Episode: 99 Exploration P: 0.1220 Total reward: -1844.3488788038208 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 6.9167

training\03_nedc.mat
SOC is nan...
elapsed_time: 69.124
Episode: 100 Exploration P: 0.1196 Total reward: -1823.8943871199235 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 7.2280

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.648
Episode: 101 Exploration P: 0.1173 Total reward: -1816.7496144421089 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 6.8964

training\03_nedc.mat
SOC is nan...
elapsed_time: 71.091
Episode: 102 Explo

SOC is nan...
elapsed_time: 68.835
Episode: 141 Exploration P: 0.0558 Total reward: -2082.540763569654 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 3.8407

training\03_nedc.mat
SOC is nan...
elapsed_time: 69.659
Episode: 142 Exploration P: 0.0548 Total reward: -2076.929332384878 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 3.5358

training\03_nedc.mat
SOC is nan...
elapsed_time: 69.252
Episode: 143 Exploration P: 0.0539 Total reward: -2027.2139755633716 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 3.7551

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.956
Episode: 144 Exploration P: 0.0530 Total reward: -2035.1982914981866 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 4.0729

training\03_nedc.mat
SOC is nan...
elapsed_time: 69.236
Episode: 145 Exploration P: 0.0520 Total reward: -2058.2167217786387 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 3.7166

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.680
Episode: 146 Expl

SOC is nan...
elapsed_time: 65.376
Episode: 185 Exploration P: 0.0280 Total reward: -2128.355631304398 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.6371

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.699
Episode: 186 Exploration P: 0.0276 Total reward: -2138.1373911224528 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.7623

training\03_nedc.mat
SOC is nan...
elapsed_time: 69.264
Episode: 187 Exploration P: 0.0272 Total reward: -2165.0839816021526 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.3632

training\03_nedc.mat
SOC is nan...
elapsed_time: 69.055
Episode: 188 Exploration P: 0.0269 Total reward: -2124.491035011917 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.6178

training\03_nedc.mat
SOC is nan...
elapsed_time: 69.347
Episode: 189 Exploration P: 0.0265 Total reward: -2104.660886732595 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.7719

training\03_nedc.mat
SOC is nan...
elapsed_time: 69.066
Episode: 190 Explo

SOC is nan...
elapsed_time: 68.599
Episode: 229 Exploration P: 0.0171 Total reward: -2148.8544759458027 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.3290

training\03_nedc.mat
SOC is nan...
elapsed_time: 69.383
Episode: 230 Exploration P: 0.0169 Total reward: -2176.9138180809096 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.2616

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.708
Episode: 231 Exploration P: 0.0168 Total reward: -2159.1300448198426 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.2904

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.935
Episode: 232 Exploration P: 0.0166 Total reward: -2198.9974064181097 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 1.9727

training\03_nedc.mat
SOC is nan...
elapsed_time: 69.380
Episode: 233 Exploration P: 0.0165 Total reward: -2130.1423916201547 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.5119

training\03_nedc.mat
SOC is nan...
elapsed_time: 69.088
Episode: 234 Ex

SOC is nan...
elapsed_time: 70.476
Episode: 273 Exploration P: 0.0128 Total reward: -2176.9028220520568 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.0497

training\03_nedc.mat
SOC is nan...
elapsed_time: 77.086
Episode: 274 Exploration P: 0.0127 Total reward: -2165.5532180750247 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.2423

training\03_nedc.mat
SOC is nan...
elapsed_time: 72.524
Episode: 275 Exploration P: 0.0127 Total reward: -2140.7095478823503 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.4927

training\03_nedc.mat
SOC is nan...
elapsed_time: 68.288
Episode: 276 Exploration P: 0.0126 Total reward: -2176.5275188632613 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.1556

training\03_nedc.mat
SOC is nan...
elapsed_time: 71.904
Episode: 277 Exploration P: 0.0126 Total reward: -2184.2354639418427 SOC: nan Cumulative_SOC_deviation: nan Fuel Consumption: 2.1974

training\03_nedc.mat
SOC is nan...
elapsed_time: 71.014
Episode: 278 Ex

KeyboardInterrupt: 

In [None]:
with open("DDPG1.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)