In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import glob
import matplotlib.pyplot as plt 
from collections import deque 
from tensorflow.keras import layers
import time 

from vehicle_model_DDPG1 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 10)

num_states = 4

In [3]:
class OUActionNoise: 
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 
        self.theta = theta 
        self.mean = mean 
        self.std_dev = std_deviation 
        self.dt = dt 
        self.x_initial = x_initial 
        self.reset() 
        
    def reset(self): 
        if self.x_initial is not None: 
            self.x_prev = self.x_initial 
        else: 
            self.x_prev = 0 
            
    def __call__(self): 
        x = (
             self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt 
            + self.std_dev * np.sqrt(self.dt) * np.random.normal() 
        )
        self.x_prev = x 
        return x 

In [4]:
class Buffer: 
    def __init__(self, buffer_capacity=100000, batch_size=64):      
        self.buffer_capacity = buffer_capacity 
        self.batch_size = batch_size 
        self.buffer_counter = 0 
        
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, 1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
        
    def record(self, obs_tuple):
        index = self.buffer_counter % self.buffer_capacity 

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        
        self.buffer_counter += 1 
        
    def learn(self): 
        record_range = min(self.buffer_counter, self.buffer_capacity)
        batch_indices = np.random.choice(record_range, self.batch_size)

        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float32)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])
        
        with tf.GradientTape() as tape: 
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.square(y - critic_value)) 
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables) 
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
        
        with tf.GradientTape() as tape: 
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            actor_loss = - tf.math.reduce_mean(critic_value)
        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables) 
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )
        

In [5]:
def update_target(tau): 
    new_weights = [] 
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_critic.set_weights(new_weights)
    
    new_weights = [] 
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights): 
        new_weights.append(target_variables[i] * (1 - tau) + tau * variable)
    target_actor.set_weights(new_weights)
    

In [6]:
def get_actor(): 
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
    
    inputs = layers.Input(shape=(num_states))
    inputs_batchnorm = layers.BatchNormalization()(inputs)
    
    out = layers.Dense(512, activation="relu")(inputs_batchnorm)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1, activation="sigmoid", 
                          kernel_initializer=last_init)(out)
    model = tf.keras.Model(inputs, outputs)
    return model

In [7]:
def get_critic(): 
    state_input = layers.Input(shape=(num_states))
    state_input_batchnorm = layers.BatchNormalization()(state_input)
    
    state_out = layers.Dense(16, activation="relu")(state_input_batchnorm)
#     state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
#     state_out = layers.BatchNormalization()(state_out)
    
    action_input = layers.Input(shape=(1))
    action_out = layers.Dense(32, activation="relu")(action_input)
#     action_out = layers.BatchNormalization()(action_out)
    
    concat = layers.Concatenate()([state_out, action_out]) 
    
    out = layers.Dense(512, activation="relu")(concat)
#     out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
#     out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1)(out)
    
    model = tf.keras.Model([state_input, action_input], outputs)
    return model 
    

In [8]:
def policy(state, noise_object): 
    j_min = state[0][2].numpy()
    j_max = state[0][3].numpy()
    sampled_action = tf.squeeze(actor_model(state)) 
    noise = noise_object()
    sampled_action = sampled_action.numpy() + noise 
    legal_action = sampled_action * j_max 
    legal_action = np.clip(legal_action, j_min, j_max)
#     print(j_min, j_max, legal_action, noise)
    return legal_action 
    

In [9]:
def policy_epsilon_greedy(state, eps): 
    j_min = state[0][-2].numpy()
    j_max = state[0][-1].numpy()

    if random.random() < eps: 
        a = random.randint(0, 9)
        return np.linspace(j_min, j_max, 10)[a]
    else: 
        sampled_action = tf.squeeze(actor_model(state)).numpy()  
        legal_action = sampled_action * j_max 
        legal_action = np.clip(legal_action, j_min, j_max)
        return legal_action

In [10]:
std_dev = 0.2 
ou_noise = OUActionNoise(mean=0, std_deviation=0.2)

critic_lr = 0.0005 
actor_lr = 0.00025 
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 300
gamma = 0.95 
tau = 0.001 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
DELAY_TRAINING = 10000 

In [11]:
def initialization(): 
    actor_model = get_actor() 
    critic_model = get_critic() 

    target_actor = get_actor() 
    target_critic = get_critic() 
    target_actor.set_weights(actor_model.get_weights())
    target_critic.set_weights(critic_model.get_weights())
    
    buffer = Buffer(500000, BATCH_SIZE)
    return actor_model, critic_model, target_actor, target_critic, buffer

In [12]:
def save_weights(actor_model, critic_model, target_actor, target_critic, root): 
    actor_model.save_weights("./{}/actor_model_checkpoint".format(root))
    critic_model.save_weights("./{}/critic_model_checkpoint".format(root))
    target_actor.save_weights("./{}/target_actor_checkpoint".format(root))
    target_critic.save_weights("./{}/target_critic_checkpoint".format(root))
    print("model is saved..")

In [13]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [14]:
def test_agent(actor_model, reward_factor, test_path_start):
    test_cycles = glob.glob("../data/driving_cycles/city/*.mat")[test_path_start:]
    test_cycle = np.random.choice(test_cycles)
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0
    state = env.reset() 
    while True: 
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        action = policy_epsilon_greedy(tf_state, -1)
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
        
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    
    print("******************* Test is start *****************")
    print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    return env.history  
    

In [15]:
print(env.version)

num_trials = 1
results_dict = {} 
driving_cycle_paths = glob.glob("../data/driving_cycles/city/*.mat")[:1]

for trial in range(num_trials): 
    print("")
    print("Trial {}".format(trial))
    print("")
    
    actor_model, critic_model, target_actor, target_critic, buffer = initialization()
    
    eps = MAX_EPSILON 
    steps = 0
    
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    episode_test_history = [] 
    episode_num_test = [] 
    for ep in range(total_episodes): 
        driving_cycle_path = np.random.choice(driving_cycle_paths)
        print(driving_cycle_path)
        env = initialization_env(driving_cycle_path, 10)
        
        start = time.time() 
        state = env.reset() 
        episodic_reward = 0 

        while True: 
            tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
            action = policy_epsilon_greedy(tf_state, eps)
    #         print(action)
            next_state, reward, done = env.step(action)
            if done: 
                next_state = [0] * num_states 

            buffer.record((state, action, reward, next_state))
            episodic_reward += reward 

            if steps > DELAY_TRAINING: 
                buffer.learn() 
                update_target(tau)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps
                                                                        -DELAY_TRAINING))

            steps += 1

            if done: 
                break 

            state = next_state 

        elapsed_time = time.time() - start 
        print("elapsed_time: {:.3f}".format(elapsed_time))
        episode_rewards.append(episodic_reward) 
        episode_SOCs.append(env.SOC)
        episode_FCs.append(env.fuel_consumption) 

    #     print("Episode * {} * Avg Reward is ==> {}".format(ep, avg_reward))
        SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
        print(
              'Episode: {}'.format(ep + 1),
              "Exploration P: {:.4f}".format(eps),
              'Total reward: {}'.format(episodic_reward), 
              "SOC: {:.4f}".format(env.SOC), 
              "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
              "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
        )
        print("")
        
        if (ep + 1) % 10 == 0: 
            history = test_agent(actor_model, 10, -1)
            episode_test_history.append(history) 
            episode_num_test.append(ep + 1)
            
    root = "DDPG1_trial{}".format(trial+1)
    save_weights(actor_model, critic_model, target_actor, target_critic, root)
    
    results_dict[trial + 1] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs, 
        "test_history": episode_test_history, 
        "test_episode_num": episode_num_test, 
    }

1

Trial 0

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 14.918
Episode: 1 Exploration P: 1.0000 Total reward: -4652.871223037801 SOC: 1.0000 Cumulative_SOC_deviation: 449.7243 Fuel Consumption: 155.6278

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 14.518
Episode: 2 Exploration P: 1.0000 Total reward: -4552.152779363954 SOC: 1.0000 Cumulative_SOC_deviation: 440.3676 Fuel Consumption: 148.4769

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 14.465
Episode: 3 Exploration P: 1.0000 Total reward: -4642.877489541429 SOC: 1.0000 Cumulative_SOC_deviation: 448.7471 Fuel Consumption: 155.4066

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 14.805
Episode: 4 Exploration P: 1.0000 Total reward: -4590.073733141276 SOC: 1.0000 Cumulative_SOC_deviation: 443.6833 Fuel Consumption: 153.2403



maximum steps, simulation is done ... 
elapsed_time: 82.061
Episode: 21 Exploration P: 0.6893 Total reward: -3849.9475412708266 SOC: 1.0000 Cumulative_SOC_deviation: 373.8782 Fuel Consumption: 111.1655

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 82.069
Episode: 22 Exploration P: 0.6709 Total reward: -3672.6971046281324 SOC: 1.0000 Cumulative_SOC_deviation: 356.3063 Fuel Consumption: 109.6342

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 81.920
Episode: 23 Exploration P: 0.6530 Total reward: -3325.37654365129 SOC: 1.0000 Cumulative_SOC_deviation: 322.2092 Fuel Consumption: 103.2849

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 81.557
Episode: 24 Exploration P: 0.6356 Total reward: -3268.4456247528165 SOC: 1.0000 Cumulative_SOC_deviation: 316.2308 Fuel Consumption: 106.1377

../data/driving_cycles/city\01_FTP72_fuds.mat
maximu

maximum steps, simulation is done ... 
elapsed_time: 82.314
Episode: 51 Exploration P: 0.3081 Total reward: -937.5342571386524 SOC: 0.6780 Cumulative_SOC_deviation: 88.3064 Fuel Consumption: 54.4700

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 81.422
Episode: 52 Exploration P: 0.3000 Total reward: -1112.1666747126026 SOC: 0.6643 Cumulative_SOC_deviation: 105.8852 Fuel Consumption: 53.3150

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 82.771
Episode: 53 Exploration P: 0.2921 Total reward: -980.7170371319429 SOC: 0.6072 Cumulative_SOC_deviation: 93.2211 Fuel Consumption: 48.5057

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 81.384
Episode: 54 Exploration P: 0.2845 Total reward: -997.6375882494118 SOC: 0.6201 Cumulative_SOC_deviation: 94.7764 Fuel Consumption: 49.8732

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps,

maximum steps, simulation is done ... 
elapsed_time: 82.871
Episode: 81 Exploration P: 0.1408 Total reward: -3270.416793134187 SOC: 0.2749 Cumulative_SOC_deviation: 324.6978 Fuel Consumption: 23.4389

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 81.705
Episode: 82 Exploration P: 0.1372 Total reward: -3300.4373088692228 SOC: 0.3054 Cumulative_SOC_deviation: 327.4430 Fuel Consumption: 26.0073

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 82.740
Episode: 83 Exploration P: 0.1338 Total reward: -3265.663689908311 SOC: 0.2830 Cumulative_SOC_deviation: 324.1544 Fuel Consumption: 24.1198

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 82.685
Episode: 84 Exploration P: 0.1304 Total reward: -3377.91120575214 SOC: 0.2742 Cumulative_SOC_deviation: 335.4263 Fuel Consumption: 23.6486

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum step

maximum steps, simulation is done ... 
elapsed_time: 82.990
Episode: 111 Exploration P: 0.0674 Total reward: -118.39435477068503 SOC: 0.6030 Cumulative_SOC_deviation: 7.3403 Fuel Consumption: 44.9911

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 82.683
Episode: 112 Exploration P: 0.0658 Total reward: -120.99370527670166 SOC: 0.6007 Cumulative_SOC_deviation: 7.6399 Fuel Consumption: 44.5944

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 82.273
Episode: 113 Exploration P: 0.0643 Total reward: -137.6669004599813 SOC: 0.5964 Cumulative_SOC_deviation: 9.3678 Fuel Consumption: 43.9885

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.110
Episode: 114 Exploration P: 0.0628 Total reward: -114.73457338911294 SOC: 0.6046 Cumulative_SOC_deviation: 6.9811 Fuel Consumption: 44.9238

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps

maximum steps, simulation is done ... 
elapsed_time: 83.106
Episode: 141 Exploration P: 0.0352 Total reward: -155.12586682670627 SOC: 0.5915 Cumulative_SOC_deviation: 11.1444 Fuel Consumption: 43.6823

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.684
Episode: 142 Exploration P: 0.0345 Total reward: -196.88882324509152 SOC: 0.5872 Cumulative_SOC_deviation: 15.3227 Fuel Consumption: 43.6619

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.781
Episode: 143 Exploration P: 0.0338 Total reward: -213.23852471008715 SOC: 0.5901 Cumulative_SOC_deviation: 16.9076 Fuel Consumption: 44.1628

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 82.080
Episode: 144 Exploration P: 0.0332 Total reward: -154.51982300291354 SOC: 0.5933 Cumulative_SOC_deviation: 11.0181 Fuel Consumption: 44.3386

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum 

maximum steps, simulation is done ... 
elapsed_time: 83.905
Episode: 171 Exploration P: 0.0210 Total reward: -203.1879575965326 SOC: 0.5959 Cumulative_SOC_deviation: 15.8053 Fuel Consumption: 45.1348

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.466
Episode: 172 Exploration P: 0.0207 Total reward: -148.62440458522997 SOC: 0.5998 Cumulative_SOC_deviation: 10.2654 Fuel Consumption: 45.9703

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 81.986
Episode: 173 Exploration P: 0.0205 Total reward: -156.48017741892585 SOC: 0.5945 Cumulative_SOC_deviation: 11.0761 Fuel Consumption: 45.7191

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.605
Episode: 174 Exploration P: 0.0202 Total reward: -174.23665110609684 SOC: 0.6010 Cumulative_SOC_deviation: 12.8483 Fuel Consumption: 45.7533

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum s

maximum steps, simulation is done ... 
elapsed_time: 83.262
Episode: 201 Exploration P: 0.0148 Total reward: -160.9191387943483 SOC: 0.5918 Cumulative_SOC_deviation: 11.4713 Fuel Consumption: 46.2062

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.391
Episode: 202 Exploration P: 0.0147 Total reward: -213.51320087165797 SOC: 0.5934 Cumulative_SOC_deviation: 16.7643 Fuel Consumption: 45.8705

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.514
Episode: 203 Exploration P: 0.0146 Total reward: -202.14993443464482 SOC: 0.5940 Cumulative_SOC_deviation: 15.6571 Fuel Consumption: 45.5785

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 82.929
Episode: 204 Exploration P: 0.0145 Total reward: -200.78414647643424 SOC: 0.5898 Cumulative_SOC_deviation: 15.5673 Fuel Consumption: 45.1114

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum s

maximum steps, simulation is done ... 
elapsed_time: 81.913
Episode: 231 Exploration P: 0.0121 Total reward: -102.15924014401122 SOC: 0.6017 Cumulative_SOC_deviation: 5.6856 Fuel Consumption: 45.3028

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 82.710
Episode: 232 Exploration P: 0.0121 Total reward: -107.38866629136092 SOC: 0.6023 Cumulative_SOC_deviation: 6.1320 Fuel Consumption: 46.0685

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 82.068
Episode: 233 Exploration P: 0.0120 Total reward: -108.03174454223895 SOC: 0.6000 Cumulative_SOC_deviation: 6.2561 Fuel Consumption: 45.4704

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 83.574
Episode: 234 Exploration P: 0.0120 Total reward: -127.72758284093975 SOC: 0.6003 Cumulative_SOC_deviation: 8.1420 Fuel Consumption: 46.3080

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum step

maximum steps, simulation is done ... 
elapsed_time: 77.010
Episode: 261 Exploration P: 0.0109 Total reward: -113.00594264891532 SOC: 0.6003 Cumulative_SOC_deviation: 6.7707 Fuel Consumption: 45.2986

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 77.203
Episode: 262 Exploration P: 0.0109 Total reward: -102.80003722491651 SOC: 0.6023 Cumulative_SOC_deviation: 5.7808 Fuel Consumption: 44.9922

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 77.017
Episode: 263 Exploration P: 0.0109 Total reward: -104.36779898294448 SOC: 0.5991 Cumulative_SOC_deviation: 5.9507 Fuel Consumption: 44.8606

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 77.487
Episode: 264 Exploration P: 0.0109 Total reward: -114.84439900678632 SOC: 0.6010 Cumulative_SOC_deviation: 6.9393 Fuel Consumption: 45.4517

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum step

maximum steps, simulation is done ... 
elapsed_time: 69.570
Episode: 291 Exploration P: 0.0104 Total reward: -89.52653918170073 SOC: 0.6009 Cumulative_SOC_deviation: 4.6805 Fuel Consumption: 42.7214

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 69.684
Episode: 292 Exploration P: 0.0104 Total reward: -93.52289619451918 SOC: 0.6012 Cumulative_SOC_deviation: 5.0894 Fuel Consumption: 42.6285

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 70.401
Episode: 293 Exploration P: 0.0104 Total reward: -102.41167492119126 SOC: 0.6009 Cumulative_SOC_deviation: 5.9081 Fuel Consumption: 43.3310

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
elapsed_time: 68.492
Episode: 294 Exploration P: 0.0104 Total reward: -100.23037510069953 SOC: 0.5998 Cumulative_SOC_deviation: 5.7089 Fuel Consumption: 43.1409

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps,

In [16]:
with open("DDPG1.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [17]:
# results_dict

{1: {'rewards': [-4652.871223037801,
   -4552.152779363954,
   -4642.877489541429,
   -4590.073733141276,
   -4656.454301482022,
   -4676.738223155193,
   -4724.579971422787,
   -4481.8135577866105,
   -4516.141070773495,
   -4609.436413674837,
   -4446.524828195151,
   -4399.043047669382,
   -4354.331373274277,
   -4153.91176218358,
   -4394.804181884799,
   -4247.803969490678,
   -3979.030446739092,
   -4076.880289984936,
   -4009.1341363095553,
   -3888.4040864795447,
   -3849.9475412708266,
   -3672.6971046281324,
   -3325.37654365129,
   -3268.4456247528165,
   -3600.949274001366,
   -3265.739629294986,
   -2942.1712159143935,
   -3099.148687203389,
   -2619.4243504081214,
   -2243.2930042069984,
   -2667.4281490594067,
   -2498.314088632156,
   -2215.8563632561777,
   -2074.4059661180513,
   -1828.1223354489734,
   -1947.1914193026114,
   -1509.6759369975869,
   -1609.0794179612649,
   -1475.846011649432,
   -1926.7071481254052,
   -1396.6520899196296,
   -1138.2619928525662,
   