In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import matplotlib.pyplot as plt 
from collections import deque 
import glob 

from vehicle_model_DDQN1 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 1)


In [3]:
# STATE_SIZE = env.calculation_comp["state_size"]
STATE_SIZE = 4
ACTION_SIZE = env.calculation_comp["action_size"] 
LEARNING_RATE = 0.00025 

TOTAL_EPISODES = 400
MAX_STEPS = 50000 

GAMMA = 0.95 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
TAU = 0.001 
DELAY_TRAINING = 10000
EPSILON_MIN_ITER = 5000

In [4]:
primary_network = keras.Sequential([
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()), 
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(ACTION_SIZE),
])
target_network = keras.Sequential([
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()), 
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(ACTION_SIZE),
])

primary_network.compile(
    loss="mse", 
    optimizer=keras.optimizers.Adam(lr=LEARNING_RATE) 
)

# for t, p in zip(target_network.trainable_variables, primary_network.trainable_variables): 
#     t.assign(p)

In [5]:
def update_network(primary_network, target_network): 
    for t, p in zip(target_network.trainable_variables, primary_network.trainable_variables): 
        t.assign(t * (1 - TAU) + p * TAU)

In [6]:
class Memory: 
    def __init__(self, max_memory): 
        self.max_memory = max_memory 
        self._samples = [] 
        
    def add_sample(self, sample): 
        self._samples.append(sample)
        if len(self._samples) > self.max_memory: 
            self._samples.pop(0)
        
    def sample(self, no_samples): 
        if no_samples > len(self._samples): 
            return random.sample(self._samples, len(self._samples))
        else: 
            return random.sample(self._samples, no_samples)
    
    @property
    def num_samples(self):
        return len(self._samples)
    

# memory = Memory(10000)

In [7]:
def choose_action(state, primary_network, eps): 
    if random.random() < eps: 
        return random.randint(0, ACTION_SIZE - 1)
    else: 
        return np.argmax(primary_network(np.array(state).reshape(1, -1))) 

In [8]:
def train(primary_network, target_network, memory): 
    batch = memory.sample(BATCH_SIZE)
    states = np.array([val[0] for val in batch]) 
    actions = np.array([val[1] for val in batch])
    rewards = np.array([val[2] for val in batch])
    next_states = np.array([np.zeros(STATE_SIZE) if val[3] is None else val[3]  
                            for val in batch])
    
    prim_qt = primary_network(states)
    prim_qtp1 = primary_network(next_states)
    target_q = prim_qt.numpy() 
    updates = rewards 
    valid_idxs = next_states.sum(axis=1) != 0 
    batch_idxs = np.arange(BATCH_SIZE)
    prim_action_tp1 = np.argmax(prim_qtp1.numpy(), axis=1)
    q_from_target = target_network(next_states)
    updates[valid_idxs] += GAMMA * q_from_target.numpy()[batch_idxs[valid_idxs], 
                                                        prim_action_tp1[valid_idxs]]
    
    target_q[batch_idxs, actions] = updates 
    loss = primary_network.train_on_batch(states, target_q)
    return loss 

In [9]:
def initialization():
    memory = Memory(10000)
    
    primary_network = keras.Sequential([
        keras.layers.Dense(30, activation="relu", input_shape=[STATE_SIZE], 
                           kernel_initializer=keras.initializers.he_normal()),
#         keras.layers.BatchNormalization(),  
        keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#         keras.layers.BatchNormalization(), 
        keras.layers.Dense(ACTION_SIZE),
    ])
    target_network = keras.Sequential([
        keras.layers.Dense(30, activation="relu", input_shape=[STATE_SIZE], 
                           kernel_initializer=keras.initializers.he_normal()), 
#         keras.layers.BatchNormalization(), 
        keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#         keras.layers.BatchNormalization(), 
        keras.layers.Dense(ACTION_SIZE),
    ])
    primary_network.compile(
        loss="mse", 
        optimizer=keras.optimizers.Adam(lr=LEARNING_RATE) 
    )
    return memory, primary_network, target_network 
    

In [10]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [11]:
def save_weights(primary_net, target_net, root): 
    primary_net.save_weights("./{}/primary_net_checkpoint".format(root))
    target_net.save_weights("./{}/target_net_checkpoint".format(root))
    print("model is saved..")

In [12]:
def test_agent(primary_network, reward_factor, test_path_start): 
    test_cycles = glob.glob("../data/driving_cycles/city/*.mat")[test_path_start:]
    test_cycle = np.random.choice(test_cycles)
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0 
    state = env.reset()
    while True: 
        action = np.argmax(primary_network(np.array(state).reshape(1, -1))) 
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
            
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    print("******************* Test is done *****************")
    print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    
    

In [13]:
print("environment version: {}".format(env.version)) 

 
reward_factors = [10] 
results_dict = {} 
driving_cycle_paths = glob.glob("../data/driving_cycles/city/*.mat")[:20]

for trial, reward_factor in enumerate(reward_factors): 
    eps = MAX_EPSILON 
    steps = 0
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    
    memory, primary_network, target_network = initialization()
#     for episode in range(TOTAL_EPISODES):
    for episode in range(TOTAL_EPISODES): 
        driving_cycle_path = np.random.choice(driving_cycle_paths)
        print(driving_cycle_path)
        env = initialization_env(driving_cycle_path, reward_factor)
        state = env.reset() 

        avg_loss = 0 
        total_reward = 0
        cnt = 1 

        while True:
            action = choose_action(state, primary_network, eps)
            next_state, reward, done = env.step(action)
            total_reward += reward 
            if done: 
                next_state = None 
            memory.add_sample((state, action, reward, next_state))

            if steps > DELAY_TRAINING: 
                loss = train(primary_network, target_network, memory)
                update_network(primary_network, target_network)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps - 
                                                                        DELAY_TRAINING))
            else: 
                loss = -1

            avg_loss += loss 
            steps += 1 

            if done: 
                SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
                avg_loss /= cnt 
                print('Episode: {}'.format(episode),
                      'Total reward: {}'.format(total_reward), 
                      'Explore P: {:.4f}'.format(eps), 
                      "SOC: {:.4f}".format(env.SOC), 
                     "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
                     "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
                     )
                
                episode_rewards.append(total_reward)
                episode_SOCs.append(env.SOC)
                episode_FCs.append(env.fuel_consumption)
                break 

            state = next_state 
            cnt += 1 
        
        if (episode + 1) % 10 == 0: 
            test_agent(primary_network, reward_factor, 20) 
    
    root = "DDQN1_trial{}".format(trial+2)
    save_weights(primary_network, target_network, root)
    
    results_dict[reward_factor] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs 
    }
            
    

environment version: 1
../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Medium_1.mat
maximum steps, simulation is done ... 
Episode: 0 Total reward: -8578.546127467564 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 833.3675 Fuel Consumption: 244.8712
../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Medium_1.mat
maximum steps, simulation is done ... 
Episode: 1 Total reward: -8652.87077560952 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 840.7239 Fuel Consumption: 245.6320
../data/driving_cycles/city\VITO_RW_BUS_TMB_Line24N_1.mat
maximum steps, simulation is done ... 
Episode: 2 Total reward: -11525.662012501505 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 1120.6650 Fuel Consumption: 319.0120
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 3 Total reward: -3696.2138623927717 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 357.3677 Fuel Consumption: 122.5366
../data/driving_cycles/city\VITO_MOLCity.

maximum steps, simulation is done ... 
Episode: 27 Total reward: -6371.025137954227 Explore P: 0.5639 SOC: 1.0000 Cumulative_SOC_deviation: 621.0022 Fuel Consumption: 161.0034
../data/driving_cycles/city\VITO_RW_Decade_Jumper_BCN_City1.mat
maximum steps, simulation is done ... 
Episode: 28 Total reward: -6276.988421046809 Explore P: 0.5447 SOC: 1.0000 Cumulative_SOC_deviation: 612.4127 Fuel Consumption: 152.8612
../data/driving_cycles/city\VITO_DUBDC.mat
maximum steps, simulation is done ... 
Episode: 29 Total reward: -2605.2027054402743 Explore P: 0.5352 SOC: 1.0000 Cumulative_SOC_deviation: 253.0025 Fuel Consumption: 75.1773
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\VITO_RW_Kangoo_DePost_Brussels_101_1.mat
Total reward: -7026.695932529145 SOC: 1.0000 Cumulative_SOC_deviation: 696.2918 Fuel Consumption: 63.7775
******************* Test is done *****************

../data/driving_cycles/city\VITO_RW_Decade_Octav

maximum steps, simulation is done ... 
Episode: 59 Total reward: -2521.599756227781 Explore P: 0.2146 SOC: 0.8137 Cumulative_SOC_deviation: 243.3254 Fuel Consumption: 88.3456
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\VITO_RW_Jumper_Brussels_101_1.mat
Total reward: -2497.4099341812616 SOC: 0.8263 Cumulative_SOC_deviation: 246.1516 Fuel Consumption: 35.8940
******************* Test is done *****************

../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Medium_1.mat
maximum steps, simulation is done ... 
Episode: 60 Total reward: -6647.649477744946 Explore P: 0.2055 SOC: 1.0000 Cumulative_SOC_deviation: 654.0718 Fuel Consumption: 106.9316
../data/driving_cycles/city\VITO_RW_Antwerp1_May19c.mat
maximum steps, simulation is done ... 
Episode: 61 Total reward: -1484.5654519836676 Explore P: 0.2015 SOC: 0.9104 Cumulative_SOC_deviation: 145.0262 Fuel Consumption: 34.3032
../data/driving_cycles/city\VITO_RW_Decad

maximum steps, simulation is done ... 
Episode: 90 Total reward: -7039.588761398397 Explore P: 0.0916 SOC: 1.0000 Cumulative_SOC_deviation: 695.4400 Fuel Consumption: 85.1885
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 91 Total reward: -815.5083703394109 Explore P: 0.0898 SOC: 0.5697 Cumulative_SOC_deviation: 77.3012 Fuel Consumption: 42.4959
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 92 Total reward: -726.824125251178 Explore P: 0.0882 SOC: 0.5514 Cumulative_SOC_deviation: 68.8058 Fuel Consumption: 38.7658
../data/driving_cycles/city\ny_city_traffic.mat
maximum steps, simulation is done ... 
Episode: 93 Total reward: -655.262118334346 Explore P: 0.0872 SOC: 0.8055 Cumulative_SOC_deviation: 63.4870 Fuel Consumption: 20.3924
../data/driving_cycles/city\VITO_RW_Decade_Octavia_BCN_City1.mat
maximum steps, simulation is done ... 
Episode: 94 Total reward: -3513.977086126866 Explore P: 0.084

maximum steps, simulation is done ... 
Episode: 122 Total reward: -3804.3283172375673 Explore P: 0.0419 SOC: 1.0000 Cumulative_SOC_deviation: 365.6640 Fuel Consumption: 147.6886
../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episode: 123 Total reward: -105.90246447813236 Explore P: 0.0412 SOC: 0.5900 Cumulative_SOC_deviation: 9.5165 Fuel Consumption: 10.7373
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 124 Total reward: -427.03435647966126 Explore P: 0.0404 SOC: 0.5942 Cumulative_SOC_deviation: 38.1637 Fuel Consumption: 45.3972
../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episode: 125 Total reward: -449.13378385770955 Explore P: 0.0397 SOC: 0.6119 Cumulative_SOC_deviation: 43.6784 Fuel Consumption: 12.3499
../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Medium_1.mat
maximum steps, simulation is done ... 
Episode: 126 Total reward: -159.7159260998431 Explore

maximum steps, simulation is done ... 
Episode: 154 Total reward: -5086.94985877835 Explore P: 0.0202 SOC: 1.0000 Cumulative_SOC_deviation: 494.3768 Fuel Consumption: 143.1816
../data/driving_cycles/city\VITO_MOLCity.mat
Available condition is not avail... SOC: 0.9918516061112828
Episode: 155 Total reward: -1639.3043732947565 Explore P: 0.0200 SOC: 0.9919 Cumulative_SOC_deviation: 155.1778 Fuel Consumption: 87.5260
../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
Episode: 156 Total reward: -40.82770280870437 Explore P: 0.0197 SOC: 0.6088 Cumulative_SOC_deviation: 3.1688 Fuel Consumption: 9.1394
../data/driving_cycles/city\VITO_RW_Decade_Octavia_MOL_City1.mat
maximum steps, simulation is done ... 
Episode: 157 Total reward: -182.21331258764764 Explore P: 0.0195 SOC: 0.5925 Cumulative_SOC_deviation: 14.8585 Fuel Consumption: 33.6285
../data/driving_cycles/city\VITO_RW_Decade_Polo_BCN_City1.mat
maximum steps, simulation is done ... 
Episode: 158 Total rewar

maximum steps, simulation is done ... 
Episode: 186 Total reward: -697.9680345714494 Explore P: 0.0137 SOC: 0.5727 Cumulative_SOC_deviation: 62.6048 Fuel Consumption: 71.9197
../data/driving_cycles/city\VITO_RW_BUS_TMB_Line24N_1.mat
maximum steps, simulation is done ... 
Episode: 187 Total reward: -628.8316647733234 Explore P: 0.0135 SOC: 0.5790 Cumulative_SOC_deviation: 61.1603 Fuel Consumption: 17.2283
../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Medium_1.mat
maximum steps, simulation is done ... 
Episode: 188 Total reward: -662.442815025086 Explore P: 0.0134 SOC: 0.5743 Cumulative_SOC_deviation: 63.3836 Fuel Consumption: 28.6071
../data/driving_cycles/city\VITO_RW_Decade_Jumper_BCN_City1.mat
maximum steps, simulation is done ... 
Episode: 189 Total reward: -445.7528028625717 Explore P: 0.0132 SOC: 0.5725 Cumulative_SOC_deviation: 42.8515 Fuel Consumption: 17.2379
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/c

maximum steps, simulation is done ... 
Episode: 218 Total reward: -215.9886677300925 Explore P: 0.0112 SOC: 0.6176 Cumulative_SOC_deviation: 19.1866 Fuel Consumption: 24.1227
../data/driving_cycles/city\VITO_MOLCity.mat
maximum steps, simulation is done ... 
Episode: 219 Total reward: -216.59273699525508 Explore P: 0.0112 SOC: 0.6093 Cumulative_SOC_deviation: 18.9086 Fuel Consumption: 27.5063
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\VITO_RW_Jumper_Brussels_101_1.mat
Total reward: -457.82992890925544 SOC: 0.6191 Cumulative_SOC_deviation: 43.5282 Fuel Consumption: 22.5478
******************* Test is done *****************

../data/driving_cycles/city\VITO_RW_Decade_Octavia_MOL_City1.mat
maximum steps, simulation is done ... 
Episode: 220 Total reward: -239.3080662922357 Explore P: 0.0112 SOC: 0.6187 Cumulative_SOC_deviation: 20.6224 Fuel Consumption: 33.0842
../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Fu

maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\VITO_RW_Jumper_Brussels_101_1.mat
Total reward: -1914.5819510471526 SOC: 0.8269 Cumulative_SOC_deviation: 187.7888 Fuel Consumption: 36.6942
******************* Test is done *****************

../data/driving_cycles/city\VITO_RW_Decade_Polo_BCN_City1.mat
maximum steps, simulation is done ... 
Episode: 250 Total reward: -472.29844146040824 Explore P: 0.0105 SOC: 0.6666 Cumulative_SOC_deviation: 44.5131 Fuel Consumption: 27.1670
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 251 Total reward: -363.32926797646945 Explore P: 0.0105 SOC: 0.6462 Cumulative_SOC_deviation: 31.8188 Fuel Consumption: 45.1415
../data/driving_cycles/city\VITO_RW_Antwerp1_May19c.mat
maximum steps, simulation is done ... 
Episode: 252 Total reward: -429.34188528026664 Explore P: 0.0105 SOC: 0.7022 Cumulative_SOC_deviation: 41.2553 Fuel Consumption: 16.78

maximum steps, simulation is done ... 
Episode: 280 Total reward: -151.06329934162156 Explore P: 0.0102 SOC: 0.6130 Cumulative_SOC_deviation: 12.7546 Fuel Consumption: 23.5171
../data/driving_cycles/city\VITO_RW_Decade_Jumper_BCN_City1.mat
maximum steps, simulation is done ... 
Episode: 281 Total reward: -167.06114853897316 Explore P: 0.0102 SOC: 0.6093 Cumulative_SOC_deviation: 14.8251 Fuel Consumption: 18.8097
../data/driving_cycles/city\VITO_RW_Decade_Octavia_MOL_City1.mat
maximum steps, simulation is done ... 
Episode: 282 Total reward: -113.38735268573375 Explore P: 0.0102 SOC: 0.6035 Cumulative_SOC_deviation: 8.1551 Fuel Consumption: 31.8362
../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Medium_1.mat
maximum steps, simulation is done ... 
Episode: 283 Total reward: -181.3962425039065 Explore P: 0.0102 SOC: 0.6075 Cumulative_SOC_deviation: 15.1895 Fuel Consumption: 29.5012
../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Medium_1.mat
maximum steps, simulation is done ... 


maximum steps, simulation is done ... 
Episode: 312 Total reward: -3007.122217326891 Explore P: 0.0101 SOC: 1.0000 Cumulative_SOC_deviation: 290.5094 Fuel Consumption: 102.0281
../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Empty_1.mat
maximum steps, simulation is done ... 
Episode: 313 Total reward: -265.8731490929082 Explore P: 0.0101 SOC: 0.6180 Cumulative_SOC_deviation: 23.4124 Fuel Consumption: 31.7494
../data/driving_cycles/city\VITO_RW_Decade_Polo_BCN_City1.mat
maximum steps, simulation is done ... 
Episode: 314 Total reward: -188.39632969382222 Explore P: 0.0101 SOC: 0.6158 Cumulative_SOC_deviation: 16.5055 Fuel Consumption: 23.3416
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 315 Total reward: -243.07963277396183 Explore P: 0.0101 SOC: 0.6216 Cumulative_SOC_deviation: 23.3291 Fuel Consumption: 9.7885
../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
Episode: 316 Total reward: -289.1087823129733

maximum steps, simulation is done ... 
Episode: 345 Total reward: -294.4227718400253 Explore P: 0.0100 SOC: 0.6280 Cumulative_SOC_deviation: 28.3023 Fuel Consumption: 11.3994
../data/driving_cycles/city\VITO_RW_Decade_Polo_BCN_City1.mat
maximum steps, simulation is done ... 
Episode: 346 Total reward: -4701.1531417535625 Explore P: 0.0100 SOC: 1.0000 Cumulative_SOC_deviation: 451.7447 Fuel Consumption: 183.7059
../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Medium_1.mat
maximum steps, simulation is done ... 
Episode: 347 Total reward: -1252.7665758737369 Explore P: 0.0100 SOC: 0.8512 Cumulative_SOC_deviation: 120.5253 Fuel Consumption: 47.5139
../data/driving_cycles/city\ny_city_composite_truck.mat
maximum steps, simulation is done ... 
Episode: 348 Total reward: -334.6974615543211 Explore P: 0.0100 SOC: 0.6409 Cumulative_SOC_deviation: 32.2061 Fuel Consumption: 12.6360
../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episode: 349 Total reward: 

maximum steps, simulation is done ... 
Episode: 377 Total reward: -3487.210641706059 Explore P: 0.0100 SOC: 1.0000 Cumulative_SOC_deviation: 335.0181 Fuel Consumption: 137.0300
../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Full_1.mat
maximum steps, simulation is done ... 
Episode: 378 Total reward: -4405.209369919062 Explore P: 0.0100 SOC: 1.0000 Cumulative_SOC_deviation: 425.0018 Fuel Consumption: 155.1912
../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
Episode: 379 Total reward: -2121.642398927887 Explore P: 0.0100 SOC: 1.0000 Cumulative_SOC_deviation: 205.3924 Fuel Consumption: 67.7188
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\VITO_RW_Jumper_Brussels_101_1.mat
Total reward: -4425.499623178481 SOC: 1.0000 Cumulative_SOC_deviation: 431.5809 Fuel Consumption: 109.6909
******************* Test is done *****************

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum 

In [14]:
with open("DDQN1.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [15]:
# with open("results/replay_memory_size_effect.pkl", "rb") as f: 
#     data = pickle.load(f)
    
# data