In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import matplotlib.pyplot as plt 
from collections import deque 
import glob 

from vehicle_model_DDQN1 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 1)


In [3]:
# STATE_SIZE = env.calculation_comp["state_size"]
STATE_SIZE = 4
ACTION_SIZE = env.calculation_comp["action_size"] 
LEARNING_RATE = 0.00025 

TOTAL_EPISODES = 400
MAX_STEPS = 50000 

GAMMA = 0.95 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
TAU = 0.001 
DELAY_TRAINING = 10000
EPSILON_MIN_ITER = 5000

In [4]:
primary_network = keras.Sequential([
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()), 
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(ACTION_SIZE),
])
target_network = keras.Sequential([
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()), 
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(ACTION_SIZE),
])

primary_network.compile(
    loss="mse", 
    optimizer=keras.optimizers.Adam(lr=LEARNING_RATE) 
)

# for t, p in zip(target_network.trainable_variables, primary_network.trainable_variables): 
#     t.assign(p)

In [5]:
def update_network(primary_network, target_network): 
    for t, p in zip(target_network.trainable_variables, primary_network.trainable_variables): 
        t.assign(t * (1 - TAU) + p * TAU)

In [6]:
class Memory: 
    def __init__(self, max_memory): 
        self.max_memory = max_memory 
        self._samples = [] 
        
    def add_sample(self, sample): 
        self._samples.append(sample)
        if len(self._samples) > self.max_memory: 
            self._samples.pop(0)
        
    def sample(self, no_samples): 
        if no_samples > len(self._samples): 
            return random.sample(self._samples, len(self._samples))
        else: 
            return random.sample(self._samples, no_samples)
    
    @property
    def num_samples(self):
        return len(self._samples)
    

# memory = Memory(10000)

In [7]:
def choose_action(state, primary_network, eps): 
    if random.random() < eps: 
        return random.randint(0, ACTION_SIZE - 1)
    else: 
        return np.argmax(primary_network(np.array(state).reshape(1, -1))) 

In [8]:
def train(primary_network, target_network, memory): 
    batch = memory.sample(BATCH_SIZE)
    states = np.array([val[0] for val in batch]) 
    actions = np.array([val[1] for val in batch])
    rewards = np.array([val[2] for val in batch])
    next_states = np.array([np.zeros(STATE_SIZE) if val[3] is None else val[3]  
                            for val in batch])
    
    prim_qt = primary_network(states)
    prim_qtp1 = primary_network(next_states)
    target_q = prim_qt.numpy() 
    updates = rewards 
    valid_idxs = next_states.sum(axis=1) != 0 
    batch_idxs = np.arange(BATCH_SIZE)
    prim_action_tp1 = np.argmax(prim_qtp1.numpy(), axis=1)
    q_from_target = target_network(next_states)
    updates[valid_idxs] += GAMMA * q_from_target.numpy()[batch_idxs[valid_idxs], 
                                                        prim_action_tp1[valid_idxs]]
    
    target_q[batch_idxs, actions] = updates 
    loss = primary_network.train_on_batch(states, target_q)
    return loss 

In [9]:
def initialization():
    memory = Memory(10000)
    
    primary_network = keras.Sequential([
        keras.layers.Dense(30, activation="relu", input_shape=[STATE_SIZE], 
                           kernel_initializer=keras.initializers.he_normal()),
#         keras.layers.BatchNormalization(),  
        keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#         keras.layers.BatchNormalization(), 
        keras.layers.Dense(ACTION_SIZE),
    ])
    target_network = keras.Sequential([
        keras.layers.Dense(30, activation="relu", input_shape=[STATE_SIZE], 
                           kernel_initializer=keras.initializers.he_normal()), 
#         keras.layers.BatchNormalization(), 
        keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#         keras.layers.BatchNormalization(), 
        keras.layers.Dense(ACTION_SIZE),
    ])
    primary_network.compile(
        loss="mse", 
        optimizer=keras.optimizers.Adam(lr=LEARNING_RATE) 
    )
    return memory, primary_network, target_network 
    

In [10]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [11]:
def save_weights(primary_net, target_net, root): 
    primary_net.save_weights("./{}/primary_net_checkpoint".format(root))
    target_net.save_weights("./{}/target_net_checkpoint".format(root))
    print("model is saved..")

In [12]:
def test_agent(primary_network, reward_factor, test_path_start): 
    test_cycles = glob.glob("../data/driving_cycles/city/*.mat")[test_path_start:]
    test_cycle = np.random.choice(test_cycles)
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0 
    state = env.reset()
    while True: 
        action = np.argmax(primary_network(np.array(state).reshape(1, -1))) 
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
            
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    print("******************* Test is done *****************")
    print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    return env.history 
    

In [13]:
print("environment version: {}".format(env.version)) 

 
reward_factors = [10] 
results_dict = {} 
driving_cycle_paths = glob.glob("../data/driving_cycles/city/*.mat")[:3]

for trial, reward_factor in enumerate(reward_factors): 
    eps = MAX_EPSILON 
    steps = 0
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    episode_test_history = [] 
    episode_num_test = [] 
    
    memory, primary_network, target_network = initialization()
#     for episode in range(TOTAL_EPISODES):
    for episode in range(TOTAL_EPISODES): 
        driving_cycle_path = np.random.choice(driving_cycle_paths)
        print(driving_cycle_path)
        env = initialization_env(driving_cycle_path, reward_factor)
        state = env.reset() 

        avg_loss = 0 
        total_reward = 0
        cnt = 1 

        while True:
            action = choose_action(state, primary_network, eps)
            next_state, reward, done = env.step(action)
            total_reward += reward 
            if done: 
                next_state = None 
            memory.add_sample((state, action, reward, next_state))

            if steps > DELAY_TRAINING: 
                loss = train(primary_network, target_network, memory)
                update_network(primary_network, target_network)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps - 
                                                                        DELAY_TRAINING))
            else: 
                loss = -1

            avg_loss += loss 
            steps += 1 

            if done: 
                SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
                avg_loss /= cnt 
                print('Episode: {}'.format(episode),
                      'Total reward: {}'.format(total_reward), 
                      'Explore P: {:.4f}'.format(eps), 
                      "SOC: {:.4f}".format(env.SOC), 
                     "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
                     "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
                     )
                
                episode_rewards.append(total_reward)
                episode_SOCs.append(env.SOC)
                episode_FCs.append(env.fuel_consumption)
                break 

            state = next_state 
            cnt += 1 
        
        if (episode + 1) % 10 == 0: 
            history = test_agent(primary_network, reward_factor, -1) 
            episode_test_history.append(history) 
            episode_num_test.append(episode + 1) 
    
    root = "DDQN2_trial{}".format(trial+1)
    save_weights(primary_network, target_network, root)
    
    results_dict[reward_factor] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs, 
        "test_history": episode_test_history, 
        "test_episode_num": episode_num_test,
    }
            
    

environment version: 1
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 0 Total reward: -3638.608206937147 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 351.7482 Fuel Consumption: 121.1267
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 1 Total reward: -3652.337961522773 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 353.1210 Fuel Consumption: 121.1280
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 2 Total reward: -3854.8794781867628 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 373.5289 Fuel Consumption: 119.5904
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 3 Total reward: -3681.3834671048808 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 356.2238 Fuel Consumption: 119.1452
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode

maximum steps, simulation is done ... 
Episode: 28 Total reward: -3941.939822203139 Explore P: 0.6108 SOC: 1.0000 Cumulative_SOC_deviation: 381.3439 Fuel Consumption: 128.5012
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 29 Total reward: -3356.1911754862203 Explore P: 0.5982 SOC: 1.0000 Cumulative_SOC_deviation: 325.2435 Fuel Consumption: 103.7563
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -751.7052419906879 SOC: 0.7083 Cumulative_SOC_deviation: 73.1137 Fuel Consumption: 20.5681
******************* Test is done *****************

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 30 Total reward: -3658.190892768613 Explore P: 0.5823 SOC: 1.0000 Cumulative_SOC_deviation: 352.8785 Fuel Consumption: 129.4062
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode:

maximum steps, simulation is done ... 
Episode: 60 Total reward: -2475.872388517739 Explore P: 0.2965 SOC: 1.0000 Cumulative_SOC_deviation: 242.3662 Fuel Consumption: 52.2101
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 61 Total reward: -1179.0292756857257 Explore P: 0.2888 SOC: 0.8247 Cumulative_SOC_deviation: 111.5558 Fuel Consumption: 63.4712
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 62 Total reward: -2888.4720359837706 Explore P: 0.2828 SOC: 1.0000 Cumulative_SOC_deviation: 282.7939 Fuel Consumption: 60.5333
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 63 Total reward: -3151.8753066189292 Explore P: 0.2769 SOC: 1.0000 Cumulative_SOC_deviation: 308.5908 Fuel Consumption: 65.9673
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 64 Total reward: -2766.6443872925147 Explore P: 0.2713 SOC: 1.0000 

maximum steps, simulation is done ... 
Episode: 94 Total reward: -812.6408631420646 Explore P: 0.1425 SOC: 0.5166 Cumulative_SOC_deviation: 77.4738 Fuel Consumption: 37.9030
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 95 Total reward: -1068.0962984323314 Explore P: 0.1390 SOC: 0.5553 Cumulative_SOC_deviation: 102.4720 Fuel Consumption: 43.3763
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 96 Total reward: -728.0962007038862 Explore P: 0.1363 SOC: 0.5732 Cumulative_SOC_deviation: 68.5502 Fuel Consumption: 42.5939
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 97 Total reward: -1466.4138609739578 Explore P: 0.1335 SOC: 1.0000 Cumulative_SOC_deviation: 141.0156 Fuel Consumption: 56.2574
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 98 Total reward: -643.653627210838 Explore P: 0.1302 SOC: 0.5941 Cumu

maximum steps, simulation is done ... 
Episode: 128 Total reward: -698.7553873461762 Explore P: 0.0698 SOC: 0.5653 Cumulative_SOC_deviation: 65.4105 Fuel Consumption: 44.6501
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 129 Total reward: -722.6150965958817 Explore P: 0.0685 SOC: 0.5730 Cumulative_SOC_deviation: 67.9372 Fuel Consumption: 43.2431
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -348.09746144593464 SOC: 0.5755 Cumulative_SOC_deviation: 33.6024 Fuel Consumption: 12.0734
******************* Test is done *****************

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 130 Total reward: -2546.5393863868394 Explore P: 0.0673 SOC: 1.0000 Cumulative_SOC_deviation: 246.1435 Fuel Consumption: 85.1040
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode:

maximum steps, simulation is done ... 
Episode: 160 Total reward: -2055.272940413383 Explore P: 0.0383 SOC: 1.0000 Cumulative_SOC_deviation: 201.1951 Fuel Consumption: 43.3221
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 161 Total reward: -232.9872026537314 Explore P: 0.0376 SOC: 0.6177 Cumulative_SOC_deviation: 18.7896 Fuel Consumption: 45.0910
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 162 Total reward: -283.16663245310747 Explore P: 0.0368 SOC: 0.6062 Cumulative_SOC_deviation: 23.8517 Fuel Consumption: 44.6501
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 163 Total reward: -255.54757473660672 Explore P: 0.0361 SOC: 0.6088 Cumulative_SOC_deviation: 21.1167 Fuel Consumption: 44.3807
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 164 Total reward: -246.69998179578974 Explore P: 0.0354 SOC: 0.60

maximum steps, simulation is done ... 
Episode: 194 Total reward: -206.72169264957202 Explore P: 0.0228 SOC: 0.6243 Cumulative_SOC_deviation: 19.6688 Fuel Consumption: 10.0335
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 195 Total reward: -1033.5411469432004 Explore P: 0.0225 SOC: 0.7836 Cumulative_SOC_deviation: 101.2619 Fuel Consumption: 20.9219
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 196 Total reward: -371.87486584751747 Explore P: 0.0223 SOC: 0.6963 Cumulative_SOC_deviation: 32.1859 Fuel Consumption: 50.0162
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 197 Total reward: -240.37482585697126 Explore P: 0.0219 SOC: 0.6400 Cumulative_SOC_deviation: 19.4377 Fuel Consumption: 45.9974
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 198 Total reward: -254.65725802506145 Explore P: 0.0216 SOC: 0.6

maximum steps, simulation is done ... 
Episode: 228 Total reward: -257.6699480694357 Explore P: 0.0158 SOC: 0.6191 Cumulative_SOC_deviation: 21.3608 Fuel Consumption: 44.0622
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 229 Total reward: -252.84472042977092 Explore P: 0.0157 SOC: 0.6253 Cumulative_SOC_deviation: 20.8966 Fuel Consumption: 43.8785
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -255.5896285939597 SOC: 0.6190 Cumulative_SOC_deviation: 24.1463 Fuel Consumption: 14.1270
******************* Test is done *****************

../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 230 Total reward: -893.2294289616655 Explore P: 0.0155 SOC: 0.6492 Cumulative_SOC_deviation: 84.7292 Fuel Consumption: 45.9376
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 23

maximum steps, simulation is done ... 
Episode: 260 Total reward: -300.3338007329996 Explore P: 0.0127 SOC: 0.6302 Cumulative_SOC_deviation: 29.0104 Fuel Consumption: 10.2294
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 261 Total reward: -290.2392978551342 Explore P: 0.0126 SOC: 0.6299 Cumulative_SOC_deviation: 24.6359 Fuel Consumption: 43.8800
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 262 Total reward: -355.2035571075228 Explore P: 0.0126 SOC: 0.6284 Cumulative_SOC_deviation: 31.0909 Fuel Consumption: 44.2949
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 263 Total reward: -313.6398980436281 Explore P: 0.0125 SOC: 0.6315 Cumulative_SOC_deviation: 30.3276 Fuel Consumption: 10.3642
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 264 Total reward: -330.1747906998627 Explore P: 0.0125 SOC: 0.6677 Cum

maximum steps, simulation is done ... 
Episode: 294 Total reward: -333.2210556779908 Explore P: 0.0112 SOC: 0.6305 Cumulative_SOC_deviation: 28.8632 Fuel Consumption: 44.5889
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 295 Total reward: -293.13997697429863 Explore P: 0.0112 SOC: 0.6499 Cumulative_SOC_deviation: 24.8072 Fuel Consumption: 45.0680
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 296 Total reward: -1077.900564779248 Explore P: 0.0112 SOC: 0.8289 Cumulative_SOC_deviation: 101.9998 Fuel Consumption: 57.9026
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 297 Total reward: -427.44584045379247 Explore P: 0.0111 SOC: 0.6398 Cumulative_SOC_deviation: 38.0616 Fuel Consumption: 46.8303
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 298 Total reward: -379.4686617555545 Explore P: 0.0111 SOC: 0.6388

maximum steps, simulation is done ... 
Episode: 328 Total reward: -225.7904188689555 Explore P: 0.0105 SOC: 0.6249 Cumulative_SOC_deviation: 21.6014 Fuel Consumption: 9.7762
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 329 Total reward: -263.0498583127859 Explore P: 0.0105 SOC: 0.6207 Cumulative_SOC_deviation: 21.8241 Fuel Consumption: 44.8093
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -288.6488340800991 SOC: 0.6245 Cumulative_SOC_deviation: 27.4093 Fuel Consumption: 14.5557
******************* Test is done *****************

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 330 Total reward: -215.46900240413203 Explore P: 0.0105 SOC: 0.6223 Cumulative_SOC_deviation: 17.1064 Fuel Consumption: 44.4052
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 33

maximum steps, simulation is done ... 
Episode: 360 Total reward: -3838.5956867736572 Explore P: 0.0103 SOC: 1.0000 Cumulative_SOC_deviation: 373.7094 Fuel Consumption: 101.5012
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 361 Total reward: -303.28933785503995 Explore P: 0.0102 SOC: 0.6231 Cumulative_SOC_deviation: 25.8174 Fuel Consumption: 45.1155
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 362 Total reward: -2878.1930905828217 Explore P: 0.0102 SOC: 1.0000 Cumulative_SOC_deviation: 281.5269 Fuel Consumption: 62.9244
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 363 Total reward: -2951.918820450111 Explore P: 0.0102 SOC: 1.0000 Cumulative_SOC_deviation: 289.0442 Fuel Consumption: 61.4764
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 364 Total reward: -3578.198728591543 Explore P: 0.0102 SOC: 1.00

maximum steps, simulation is done ... 
Episode: 394 Total reward: -190.28334485708487 Explore P: 0.0101 SOC: 0.6248 Cumulative_SOC_deviation: 14.6452 Fuel Consumption: 43.8310
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 395 Total reward: -191.97680747088134 Explore P: 0.0101 SOC: 0.6167 Cumulative_SOC_deviation: 14.8331 Fuel Consumption: 43.6458
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 396 Total reward: -267.98886473461937 Explore P: 0.0101 SOC: 0.6708 Cumulative_SOC_deviation: 22.1182 Fuel Consumption: 46.8072
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 397 Total reward: -195.2229569140252 Explore P: 0.0101 SOC: 0.6236 Cumulative_SOC_deviation: 15.1112 Fuel Consumption: 44.1112
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 398 Total reward: -223.44056393256454 Explore P: 0.0101 SOC: 0.618

In [14]:
with open("DDQN2.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [15]:
# with open("results/replay_memory_size_effect.pkl", "rb") as f: 
#     data = pickle.load(f)
    
# data