In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import matplotlib.pyplot as plt 
from collections import deque 
import glob 

from vehicle_model_DDQN1 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 1)


In [3]:
# STATE_SIZE = env.calculation_comp["state_size"]
STATE_SIZE = 4
ACTION_SIZE = env.calculation_comp["action_size"] 
LEARNING_RATE = 0.00025 

TOTAL_EPISODES = 400
MAX_STEPS = 50000 

GAMMA = 0.95 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
TAU = 0.001 
DELAY_TRAINING = 10000
EPSILON_MIN_ITER = 5000

In [4]:
primary_network = keras.Sequential([
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()), 
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(ACTION_SIZE),
])
target_network = keras.Sequential([
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()), 
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(ACTION_SIZE),
])

primary_network.compile(
    loss="mse", 
    optimizer=keras.optimizers.Adam(lr=LEARNING_RATE) 
)

# for t, p in zip(target_network.trainable_variables, primary_network.trainable_variables): 
#     t.assign(p)

In [5]:
def update_network(primary_network, target_network): 
    for t, p in zip(target_network.trainable_variables, primary_network.trainable_variables): 
        t.assign(t * (1 - TAU) + p * TAU)

In [6]:
class Memory: 
    def __init__(self, max_memory): 
        self.max_memory = max_memory 
        self._samples = [] 
        
    def add_sample(self, sample): 
        self._samples.append(sample)
        if len(self._samples) > self.max_memory: 
            self._samples.pop(0)
        
    def sample(self, no_samples): 
        if no_samples > len(self._samples): 
            return random.sample(self._samples, len(self._samples))
        else: 
            return random.sample(self._samples, no_samples)
    
    @property
    def num_samples(self):
        return len(self._samples)
    

# memory = Memory(10000)

In [7]:
def choose_action(state, primary_network, eps): 
    if random.random() < eps: 
        return random.randint(0, ACTION_SIZE - 1)
    else: 
        return np.argmax(primary_network(np.array(state).reshape(1, -1))) 

In [8]:
def train(primary_network, target_network, memory): 
    batch = memory.sample(BATCH_SIZE)
    states = np.array([val[0] for val in batch]) 
    actions = np.array([val[1] for val in batch])
    rewards = np.array([val[2] for val in batch])
    next_states = np.array([np.zeros(STATE_SIZE) if val[3] is None else val[3]  
                            for val in batch])
    
    prim_qt = primary_network(states)
    prim_qtp1 = primary_network(next_states)
    target_q = prim_qt.numpy() 
    updates = rewards 
    valid_idxs = next_states.sum(axis=1) != 0 
    batch_idxs = np.arange(BATCH_SIZE)
    prim_action_tp1 = np.argmax(prim_qtp1.numpy(), axis=1)
    q_from_target = target_network(next_states)
    updates[valid_idxs] += GAMMA * q_from_target.numpy()[batch_idxs[valid_idxs], 
                                                        prim_action_tp1[valid_idxs]]
    
    target_q[batch_idxs, actions] = updates 
    loss = primary_network.train_on_batch(states, target_q)
    return loss 

In [9]:
def initialization():
    memory = Memory(10000)
    
    primary_network = keras.Sequential([
        keras.layers.Dense(30, activation="relu", input_shape=[STATE_SIZE], 
                           kernel_initializer=keras.initializers.he_normal()),
#         keras.layers.BatchNormalization(),  
        keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#         keras.layers.BatchNormalization(), 
        keras.layers.Dense(ACTION_SIZE),
    ])
    target_network = keras.Sequential([
        keras.layers.Dense(30, activation="relu", input_shape=[STATE_SIZE], 
                           kernel_initializer=keras.initializers.he_normal()), 
#         keras.layers.BatchNormalization(), 
        keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#         keras.layers.BatchNormalization(), 
        keras.layers.Dense(ACTION_SIZE),
    ])
    primary_network.compile(
        loss="mse", 
        optimizer=keras.optimizers.Adam(lr=LEARNING_RATE) 
    )
    return memory, primary_network, target_network 
    

In [10]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [11]:
def save_weights(primary_net, target_net, root): 
    primary_net.save_weights("./{}/primary_net_checkpoint".format(root))
    target_net.save_weights("./{}/target_net_checkpoint".format(root))
    print("model is saved..")

In [12]:
def test_agent(primary_network, reward_factor, test_path_start): 
    test_cycles = glob.glob("../data/driving_cycles/city/*.mat")[test_path_start:]
    test_cycle = np.random.choice(test_cycles)
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0 
    state = env.reset()
    while True: 
        action = np.argmax(primary_network(np.array(state).reshape(1, -1))) 
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
            
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    print("******************* Test is done *****************")
    print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    return env.history 
    
    

In [13]:
print("environment version: {}".format(env.version)) 

 
reward_factors = [10] 
results_dict = {} 
driving_cycle_paths = glob.glob("../data/driving_cycles/city/*.mat")[:7]

for trial, reward_factor in enumerate(reward_factors): 
    eps = MAX_EPSILON 
    steps = 0
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    episode_test_history = [] 
    episode_num_test = [] 
    
    memory, primary_network, target_network = initialization()
#     for episode in range(TOTAL_EPISODES):
    for episode in range(TOTAL_EPISODES): 
        driving_cycle_path = np.random.choice(driving_cycle_paths)
        print(driving_cycle_path)
        env = initialization_env(driving_cycle_path, reward_factor)
        state = env.reset() 

        avg_loss = 0 
        total_reward = 0
        cnt = 1 

        while True:
            action = choose_action(state, primary_network, eps)
            next_state, reward, done = env.step(action)
            total_reward += reward 
            if done: 
                next_state = None 
            memory.add_sample((state, action, reward, next_state))

            if steps > DELAY_TRAINING: 
                loss = train(primary_network, target_network, memory)
                update_network(primary_network, target_network)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps - 
                                                                        DELAY_TRAINING))
            else: 
                loss = -1

            avg_loss += loss 
            steps += 1 

            if done: 
                SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
                avg_loss /= cnt 
                print('Episode: {}'.format(episode),
                      'Total reward: {}'.format(total_reward), 
                      'Explore P: {:.4f}'.format(eps), 
                      "SOC: {:.4f}".format(env.SOC), 
                     "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
                     "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
                     )
                
                episode_rewards.append(total_reward)
                episode_SOCs.append(env.SOC)
                episode_FCs.append(env.fuel_consumption)
                break 

            state = next_state 
            cnt += 1 
        
        if (episode + 1) % 10 == 0: 
            history = test_agent(primary_network, reward_factor, -1) 
            episode_test_history.append(history) 
            episode_num_test.append(episode + 1) 
    
    root = "DDQN3_trial{}".format(trial+1)
    save_weights(primary_network, target_network, root)
    
    results_dict[reward_factor] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs, 
        "test_history": episode_test_history, 
        "test_episode_num": episode_num_test,
    }
            
    

environment version: 1
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 0 Total reward: -3683.1876025270276 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 356.2401 Fuel Consumption: 120.7865
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 1 Total reward: -3914.6859482135933 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 379.2191 Fuel Consumption: 122.4945
../data/driving_cycles/city\FTP_75_cycle.mat
maximum steps, simulation is done ... 
Episode: 2 Total reward: -6571.389416275781 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 636.1257 Fuel Consumption: 210.1328
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 3 Total reward: -3635.2356359830187 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 351.5319 Fuel Consumption: 119.9169
../data/driving_cycles/city\ny_city_composite_truck.mat
maximum steps, simulation is done ..

maximum steps, simulation is done ... 
Episode: 28 Total reward: -3083.0331583951634 Explore P: 0.5516 SOC: 1.0000 Cumulative_SOC_deviation: 298.0768 Fuel Consumption: 102.2648
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 29 Total reward: -3591.655856266043 Explore P: 0.5369 SOC: 1.0000 Cumulative_SOC_deviation: 346.6973 Fuel Consumption: 124.6826
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -2930.435137356134 SOC: 1.0000 Cumulative_SOC_deviation: 286.6157 Fuel Consumption: 64.2784
******************* Test is done *****************

../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episode: 30 Total reward: -3543.313949531126 Explore P: 0.5257 SOC: 1.0000 Cumulative_SOC_deviation: 342.9171 Fuel Consumption: 114.1430
../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episod

maximum steps, simulation is done ... 
Episode: 60 Total reward: -833.7002210880136 Explore P: 0.2628 SOC: 0.6282 Cumulative_SOC_deviation: 78.6453 Fuel Consumption: 47.2468
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 61 Total reward: -1433.0054102163544 Explore P: 0.2560 SOC: 0.9348 Cumulative_SOC_deviation: 135.9596 Fuel Consumption: 73.4098
../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
Episode: 62 Total reward: -2525.332679315647 Explore P: 0.2507 SOC: 1.0000 Cumulative_SOC_deviation: 243.2278 Fuel Consumption: 93.0544
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 63 Total reward: -2741.8906600635064 Explore P: 0.2455 SOC: 1.0000 Cumulative_SOC_deviation: 264.8089 Fuel Consumption: 93.8015
../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
Episode: 64 Total reward: -2362.3209155907994 Explore P: 0.2404 SOC: 1.0000 Cumulativ

maximum steps, simulation is done ... 
Episode: 93 Total reward: -2127.25452345841 Explore P: 0.1259 SOC: 1.0000 Cumulative_SOC_deviation: 207.1442 Fuel Consumption: 55.8126
../data/driving_cycles/city\ny_city_composite_truck.mat
maximum steps, simulation is done ... 
Episode: 94 Total reward: -693.7846330210073 Explore P: 0.1235 SOC: 0.7289 Cumulative_SOC_deviation: 67.3383 Fuel Consumption: 20.4012
../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
Episode: 95 Total reward: -731.7645141724563 Explore P: 0.1211 SOC: 0.8309 Cumulative_SOC_deviation: 70.5760 Fuel Consumption: 26.0048
../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
Episode: 96 Total reward: -820.4319128880147 Explore P: 0.1187 SOC: 0.9362 Cumulative_SOC_deviation: 78.5168 Fuel Consumption: 35.2643
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 97 Total reward: -495.2889374143091 Explore P: 0.1164 SOC: 0.5944 Cumu

maximum steps, simulation is done ... 
Episode: 127 Total reward: -541.9346112044556 Explore P: 0.0612 SOC: 0.6984 Cumulative_SOC_deviation: 48.8267 Fuel Consumption: 53.6674
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 128 Total reward: -211.03633084129748 Explore P: 0.0601 SOC: 0.6701 Cumulative_SOC_deviation: 19.6949 Fuel Consumption: 14.0875
../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episode: 129 Total reward: -934.0337733063923 Explore P: 0.0590 SOC: 0.8296 Cumulative_SOC_deviation: 90.6137 Fuel Consumption: 27.8967
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -114.03971873401663 SOC: 0.5993 Cumulative_SOC_deviation: 10.1031 Fuel Consumption: 13.0084
******************* Test is done *****************

../data/driving_cycles/city\ny_city_composite_truck.mat
maximum steps, simulation is done ... 


maximum steps, simulation is done ... 
Episode: 160 Total reward: -57.44759279778919 Explore P: 0.0322 SOC: 0.5964 Cumulative_SOC_deviation: 4.8994 Fuel Consumption: 8.4535
../data/driving_cycles/city\FTP_75_cycle.mat
maximum steps, simulation is done ... 
Episode: 161 Total reward: -670.0183695025551 Explore P: 0.0314 SOC: 0.5718 Cumulative_SOC_deviation: 59.6300 Fuel Consumption: 73.7188
../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episode: 162 Total reward: -2523.5478337179875 Explore P: 0.0309 SOC: 1.0000 Cumulative_SOC_deviation: 247.0850 Fuel Consumption: 52.6975
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 163 Total reward: -91.51458864669091 Explore P: 0.0305 SOC: 0.5917 Cumulative_SOC_deviation: 8.3625 Fuel Consumption: 7.8901
../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episode: 164 Total reward: -1284.9878104343481 Explore P: 0.0300 SOC: 1.0000 Cumu

maximum steps, simulation is done ... 
Episode: 194 Total reward: -188.82867798722643 Explore P: 0.0191 SOC: 0.6091 Cumulative_SOC_deviation: 14.4871 Fuel Consumption: 43.9575
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 195 Total reward: -138.05566493744277 Explore P: 0.0189 SOC: 0.6066 Cumulative_SOC_deviation: 9.3416 Fuel Consumption: 44.6392
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 196 Total reward: -124.67902961368358 Explore P: 0.0186 SOC: 0.6064 Cumulative_SOC_deviation: 7.9856 Fuel Consumption: 44.8229
../data/driving_cycles/city\ny_city_composite_truck.mat
maximum steps, simulation is done ... 
Episode: 197 Total reward: -103.4475708547791 Explore P: 0.0184 SOC: 0.6668 Cumulative_SOC_deviation: 8.8938 Fuel Consumption: 14.5099
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 198 Total reward: -121.27614134332596 Explore P: 0.0183 SOC

maximum steps, simulation is done ... 
Episode: 228 Total reward: -134.42653053661405 Explore P: 0.0137 SOC: 0.6077 Cumulative_SOC_deviation: 9.0449 Fuel Consumption: 43.9778
../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episode: 229 Total reward: -92.12969673465332 Explore P: 0.0136 SOC: 0.6127 Cumulative_SOC_deviation: 8.0179 Fuel Consumption: 11.9512
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -4214.806434808531 SOC: 1.0000 Cumulative_SOC_deviation: 407.4284 Fuel Consumption: 140.5222
******************* Test is done *****************

../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episode: 230 Total reward: -74.55479253698745 Explore P: 0.0136 SOC: 0.6138 Cumulative_SOC_deviation: 6.2738 Fuel Consumption: 11.8165
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 23

maximum steps, simulation is done ... 
Episode: 260 Total reward: -73.52753558893484 Explore P: 0.0117 SOC: 0.6127 Cumulative_SOC_deviation: 6.4694 Fuel Consumption: 8.8332
../data/driving_cycles/city\ny_city_composite_truck.mat
maximum steps, simulation is done ... 
Episode: 261 Total reward: -551.0015551831168 Explore P: 0.0116 SOC: 0.7449 Cumulative_SOC_deviation: 53.1592 Fuel Consumption: 19.4091
../data/driving_cycles/city\FTP_75_cycle.mat
maximum steps, simulation is done ... 
Episode: 262 Total reward: -223.6299495466805 Explore P: 0.0116 SOC: 0.6083 Cumulative_SOC_deviation: 15.4060 Fuel Consumption: 69.5695
../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
Episode: 263 Total reward: -182.63488663510265 Explore P: 0.0115 SOC: 0.6818 Cumulative_SOC_deviation: 16.8988 Fuel Consumption: 13.6466
../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
Episode: 264 Total reward: -488.0210362112723 Explore P: 0.0115 SOC: 0.7662 C

maximum steps, simulation is done ... 
Episode: 294 Total reward: -103.76477764508331 Explore P: 0.0107 SOC: 0.6066 Cumulative_SOC_deviation: 6.0718 Fuel Consumption: 43.0470
../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
Episode: 295 Total reward: -54.51134578766113 Explore P: 0.0107 SOC: 0.6098 Cumulative_SOC_deviation: 4.5935 Fuel Consumption: 8.5759
../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episode: 296 Total reward: -59.77661501814118 Explore P: 0.0107 SOC: 0.6112 Cumulative_SOC_deviation: 4.8315 Fuel Consumption: 11.4613
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 297 Total reward: -96.62736159866911 Explore P: 0.0107 SOC: 0.6112 Cumulative_SOC_deviation: 5.3127 Fuel Consumption: 43.5002
../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
Episode: 298 Total reward: -60.092862066572756 Explore P: 0.0106 SOC: 0.6031 Cumulative_S

maximum steps, simulation is done ... 
Episode: 328 Total reward: -150.03530940511166 Explore P: 0.0103 SOC: 0.6125 Cumulative_SOC_deviation: 7.9278 Fuel Consumption: 70.7575
../data/driving_cycles/city\ny_city_composite_truck.mat
maximum steps, simulation is done ... 
Episode: 329 Total reward: -59.9800622081204 Explore P: 0.0103 SOC: 0.6082 Cumulative_SOC_deviation: 4.9977 Fuel Consumption: 10.0026
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -114.30807993075774 SOC: 0.6074 Cumulative_SOC_deviation: 10.1120 Fuel Consumption: 13.1880
******************* Test is done *****************

../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 330 Total reward: -54.05036000140351 Explore P: 0.0103 SOC: 0.6039 Cumulative_SOC_deviation: 4.5572 Fuel Consumption: 8.4780
../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
Episode: 

maximum steps, simulation is done ... 
Episode: 360 Total reward: -57.295405625269424 Explore P: 0.0101 SOC: 0.5985 Cumulative_SOC_deviation: 4.9356 Fuel Consumption: 7.9390
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 361 Total reward: -66.43840610688234 Explore P: 0.0101 SOC: 0.6050 Cumulative_SOC_deviation: 5.8107 Fuel Consumption: 8.3310
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 362 Total reward: -112.089497927202 Explore P: 0.0101 SOC: 0.6101 Cumulative_SOC_deviation: 6.9577 Fuel Consumption: 42.5123
../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episode: 363 Total reward: -72.2337161358886 Explore P: 0.0101 SOC: 0.6124 Cumulative_SOC_deviation: 6.0564 Fuel Consumption: 11.6695
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 364 Total reward: -695.0273580952395 Explore P: 0.0101 SOC: 0.6940 Cumulative

maximum steps, simulation is done ... 
Episode: 394 Total reward: -70.72640672119981 Explore P: 0.0101 SOC: 0.6128 Cumulative_SOC_deviation: 6.0369 Fuel Consumption: 10.3578
../data/driving_cycles/city\FTP_75_cycle.mat
maximum steps, simulation is done ... 
Episode: 395 Total reward: -172.42882722343063 Explore P: 0.0101 SOC: 0.6097 Cumulative_SOC_deviation: 10.2161 Fuel Consumption: 70.2676
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 396 Total reward: -132.94348315988225 Explore P: 0.0101 SOC: 0.6181 Cumulative_SOC_deviation: 9.0443 Fuel Consumption: 42.5000
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 397 Total reward: -140.41185323080492 Explore P: 0.0101 SOC: 0.6171 Cumulative_SOC_deviation: 9.8181 Fuel Consumption: 42.2306
../data/driving_cycles/city\FTP_75_cycle.mat
maximum steps, simulation is done ... 
Episode: 398 Total reward: -170.25857723063422 Explore P: 0.0101 SOC: 0.6118 Cu

In [14]:
with open("DDQN3.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [15]:
# with open("results/replay_memory_size_effect.pkl", "rb") as f: 
#     data = pickle.load(f)
    
# data