In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import matplotlib.pyplot as plt 
from collections import deque 
import glob 

from vehicle_model_DDQN1 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 1)


In [3]:
# STATE_SIZE = env.calculation_comp["state_size"]
STATE_SIZE = 4
ACTION_SIZE = env.calculation_comp["action_size"] 
LEARNING_RATE = 0.00025 

TOTAL_EPISODES = 400
MAX_STEPS = 50000 

GAMMA = 0.95 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
TAU = 0.001 
DELAY_TRAINING = 10000
EPSILON_MIN_ITER = 5000

In [4]:
primary_network = keras.Sequential([
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()), 
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(ACTION_SIZE),
])
target_network = keras.Sequential([
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()), 
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(ACTION_SIZE),
])

primary_network.compile(
    loss="mse", 
    optimizer=keras.optimizers.Adam(lr=LEARNING_RATE) 
)

# for t, p in zip(target_network.trainable_variables, primary_network.trainable_variables): 
#     t.assign(p)

In [5]:
def update_network(primary_network, target_network): 
    for t, p in zip(target_network.trainable_variables, primary_network.trainable_variables): 
        t.assign(t * (1 - TAU) + p * TAU)

In [6]:
class Memory: 
    def __init__(self, max_memory): 
        self.max_memory = max_memory 
        self._samples = [] 
        
    def add_sample(self, sample): 
        self._samples.append(sample)
        if len(self._samples) > self.max_memory: 
            self._samples.pop(0)
        
    def sample(self, no_samples): 
        if no_samples > len(self._samples): 
            return random.sample(self._samples, len(self._samples))
        else: 
            return random.sample(self._samples, no_samples)
    
    @property
    def num_samples(self):
        return len(self._samples)
    

# memory = Memory(10000)

In [7]:
def choose_action(state, primary_network, eps): 
    if random.random() < eps: 
        return random.randint(0, ACTION_SIZE - 1)
    else: 
        return np.argmax(primary_network(np.array(state).reshape(1, -1))) 

In [8]:
def train(primary_network, target_network, memory): 
    batch = memory.sample(BATCH_SIZE)
    states = np.array([val[0] for val in batch]) 
    actions = np.array([val[1] for val in batch])
    rewards = np.array([val[2] for val in batch])
    next_states = np.array([np.zeros(STATE_SIZE) if val[3] is None else val[3]  
                            for val in batch])
    
    prim_qt = primary_network(states)
    prim_qtp1 = primary_network(next_states)
    target_q = prim_qt.numpy() 
    updates = rewards 
    valid_idxs = next_states.sum(axis=1) != 0 
    batch_idxs = np.arange(BATCH_SIZE)
    prim_action_tp1 = np.argmax(prim_qtp1.numpy(), axis=1)
    q_from_target = target_network(next_states)
    updates[valid_idxs] += GAMMA * q_from_target.numpy()[batch_idxs[valid_idxs], 
                                                        prim_action_tp1[valid_idxs]]
    
    target_q[batch_idxs, actions] = updates 
    loss = primary_network.train_on_batch(states, target_q)
    return loss 

In [9]:
def initialization():
    memory = Memory(10000)
    
    primary_network = keras.Sequential([
        keras.layers.Dense(30, activation="relu", input_shape=[STATE_SIZE], 
                           kernel_initializer=keras.initializers.he_normal()),
#         keras.layers.BatchNormalization(),  
        keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#         keras.layers.BatchNormalization(), 
        keras.layers.Dense(ACTION_SIZE),
    ])
    target_network = keras.Sequential([
        keras.layers.Dense(30, activation="relu", input_shape=[STATE_SIZE], 
                           kernel_initializer=keras.initializers.he_normal()), 
#         keras.layers.BatchNormalization(), 
        keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#         keras.layers.BatchNormalization(), 
        keras.layers.Dense(ACTION_SIZE),
    ])
    primary_network.compile(
        loss="mse", 
        optimizer=keras.optimizers.Adam(lr=LEARNING_RATE) 
    )
    return memory, primary_network, target_network 
    

In [10]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [11]:
def save_weights(primary_net, target_net, root): 
    primary_net.save_weights("./{}/primary_net_checkpoint".format(root))
    target_net.save_weights("./{}/target_net_checkpoint".format(root))
    print("model is saved..")

In [12]:
def test_agent(primary_network, reward_factor, test_path_start): 
    test_cycles = glob.glob("../data/driving_cycles/city/*.mat")[test_path_start:]
    test_cycle = np.random.choice(test_cycles)
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0 
    state = env.reset()
    while True: 
        action = np.argmax(primary_network(np.array(state).reshape(1, -1))) 
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
            
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    print("******************* Test is done *****************")
    print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    return env.history 
    

In [13]:
print("environment version: {}".format(env.version)) 

 
reward_factors = [10] 
results_dict = {} 
driving_cycle_paths = glob.glob("../data/driving_cycles/city/*.mat")[:20]

for trial, reward_factor in enumerate(reward_factors): 
    eps = MAX_EPSILON 
    steps = 0
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    episode_test_history = [] 
    episode_num_test = [] 
    
    memory, primary_network, target_network = initialization()
#     for episode in range(TOTAL_EPISODES):
    for episode in range(TOTAL_EPISODES): 
        k = episode % len(driving_cycle_paths)
        driving_cycle_path = driving_cycle_paths[k]
#         driving_cycle_path = np.random.choice(driving_cycle_paths)
        print(driving_cycle_path)
        env = initialization_env(driving_cycle_path, reward_factor)
        state = env.reset() 

        avg_loss = 0 
        total_reward = 0
        cnt = 1 

        while True:
            action = choose_action(state, primary_network, eps)
            next_state, reward, done = env.step(action)
            total_reward += reward 
            if done: 
                next_state = None 
            memory.add_sample((state, action, reward, next_state))

            if steps > DELAY_TRAINING: 
                loss = train(primary_network, target_network, memory)
                update_network(primary_network, target_network)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps - 
                                                                        DELAY_TRAINING))
            else: 
                loss = -1

            avg_loss += loss 
            steps += 1 

            if done: 
                SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
                avg_loss /= cnt 
                print('Episode: {}'.format(episode),
                      'Total reward: {}'.format(total_reward), 
                      'Explore P: {:.4f}'.format(eps), 
                      "SOC: {:.4f}".format(env.SOC), 
                     "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
                     "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
                     )
                
                episode_rewards.append(total_reward)
                episode_SOCs.append(env.SOC)
                episode_FCs.append(env.fuel_consumption)
                break 

            state = next_state 
            cnt += 1 
        
        if (episode + 1) % 10 == 0: 
            history = test_agent(primary_network, reward_factor, -1) 
            episode_test_history.append(history) 
            episode_num_test.append(episode + 1) 
    
    root = "DDQN4_trial{}".format(trial+1)
    save_weights(primary_network, target_network, root)
    
    results_dict[reward_factor] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs, 
        "test_history": episode_test_history, 
        "test_episode_num": episode_num_test,
    }
            
    

environment version: 1
../data/driving_cycles/city\VITO_RW_BUS_TMB_Line24N_1.mat
maximum steps, simulation is done ... 
Episode: 0 Total reward: -11511.766973210815 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 1119.1962 Fuel Consumption: 319.8054
../data/driving_cycles/city\VITO_RW_Decade_Octavia_BCN_City1.mat
maximum steps, simulation is done ... 
Episode: 1 Total reward: -6026.5162737234405 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 584.7631 Fuel Consumption: 178.8854
../data/driving_cycles/city\VITO_RW_Decade_Octavia_MOL_City1.mat
maximum steps, simulation is done ... 
Episode: 2 Total reward: -5439.9474679210325 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 527.6911 Fuel Consumption: 163.0367
../data/driving_cycles/city\VITO_RW_Decade_Jumper_MOL_City1.mat
maximum steps, simulation is done ... 
Episode: 3 Total reward: -7623.598032336902 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 740.3942 Fuel Consumption: 219.6565
../data/driving_cycle

maximum steps, simulation is done ... 
Episode: 27 Total reward: -9373.247546830182 Explore P: 0.4919 SOC: 1.0000 Cumulative_SOC_deviation: 910.1628 Fuel Consumption: 271.6192
../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Empty_1.mat
maximum steps, simulation is done ... 
Episode: 28 Total reward: -7119.410276335128 Explore P: 0.4718 SOC: 1.0000 Cumulative_SOC_deviation: 695.2194 Fuel Consumption: 167.2162
../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Empty_1.mat
maximum steps, simulation is done ... 
Episode: 29 Total reward: -6986.437769575338 Explore P: 0.4526 SOC: 1.0000 Cumulative_SOC_deviation: 682.0852 Fuel Consumption: 165.5859
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -551.3217503440917 SOC: 0.5551 Cumulative_SOC_deviation: 54.1167 Fuel Consumption: 10.1546
******************* Test is done *****************

../data/driving_cycles/city\VITO_RW_Decade_Jumper_BCN_Cit

maximum steps, simulation is done ... 
Episode: 59 Total reward: -4041.289098973987 Explore P: 0.1862 SOC: 1.0000 Cumulative_SOC_deviation: 393.4915 Fuel Consumption: 106.3737
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -266.64420144476327 SOC: 0.6172 Cumulative_SOC_deviation: 25.2803 Fuel Consumption: 13.8412
******************* Test is done *****************

../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Empty_1.mat
maximum steps, simulation is done ... 
Episode: 60 Total reward: -4808.765710468003 Explore P: 0.1788 SOC: 1.0000 Cumulative_SOC_deviation: 469.3218 Fuel Consumption: 115.5474
../data/driving_cycles/city\ny_city_traffic.mat
maximum steps, simulation is done ... 
Episode: 61 Total reward: -368.4692501129993 Explore P: 0.1768 SOC: 0.7320 Cumulative_SOC_deviation: 35.3001 Fuel Consumption: 15.4687
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is

maximum steps, simulation is done ... 
Episode: 90 Total reward: -235.71704460276288 Explore P: 0.0825 SOC: 0.6209 Cumulative_SOC_deviation: 21.0423 Fuel Consumption: 25.2945
../data/driving_cycles/city\VITO_RW_BUS_TMB_Line24N_1.mat
maximum steps, simulation is done ... 
Episode: 91 Total reward: -8129.922925237687 Explore P: 0.0783 SOC: 1.0000 Cumulative_SOC_deviation: 793.6496 Fuel Consumption: 193.4266
../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episode: 92 Total reward: -776.9497670902358 Explore P: 0.0769 SOC: 0.6989 Cumulative_SOC_deviation: 75.8337 Fuel Consumption: 18.6127
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 93 Total reward: -309.0975322300255 Explore P: 0.0751 SOC: 0.5972 Cumulative_SOC_deviation: 26.4069 Fuel Consumption: 45.0284
../data/driving_cycles/city\ny_city_composite_truck.mat
maximum steps, simulation is done ... 
Episode: 94 Total reward: -303.389908370895 Explore P: 0

maximum steps, simulation is done ... 
Episode: 123 Total reward: -100.01595503986627 Explore P: 0.0381 SOC: 0.5909 Cumulative_SOC_deviation: 8.3376 Fuel Consumption: 16.6401
../data/driving_cycles/city\FTP_75_cycle.mat
maximum steps, simulation is done ... 
Episode: 124 Total reward: -556.4768515687007 Explore P: 0.0371 SOC: 0.5802 Cumulative_SOC_deviation: 48.3990 Fuel Consumption: 72.4872
../data/driving_cycles/city\ny_city_traffic.mat
maximum steps, simulation is done ... 
Episode: 125 Total reward: -30.463039007773954 Explore P: 0.0368 SOC: 0.6035 Cumulative_SOC_deviation: 2.4646 Fuel Consumption: 5.8173
../data/driving_cycles/city\ny_city_traffic.mat
maximum steps, simulation is done ... 
Episode: 126 Total reward: -33.16837640713598 Explore P: 0.0364 SOC: 0.5976 Cumulative_SOC_deviation: 2.7988 Fuel Consumption: 5.1804
../data/driving_cycles/city\VITO_RW_Antwerp1_May19c.mat
maximum steps, simulation is done ... 
Episode: 127 Total reward: -100.06076477319073 Explore P: 0.0359 SO

maximum steps, simulation is done ... 
Episode: 156 Total reward: -276.6620434043846 Explore P: 0.0208 SOC: 0.6145 Cumulative_SOC_deviation: 24.5409 Fuel Consumption: 31.2527
../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Full_1.mat
maximum steps, simulation is done ... 
Episode: 157 Total reward: -399.59156443789635 Explore P: 0.0202 SOC: 0.6493 Cumulative_SOC_deviation: 36.6735 Fuel Consumption: 32.8569
../data/driving_cycles/city\VITO_RW_Antwerp1_May19c.mat
maximum steps, simulation is done ... 
Episode: 158 Total reward: -63.30447661612456 Explore P: 0.0200 SOC: 0.6100 Cumulative_SOC_deviation: 5.2762 Fuel Consumption: 10.5421
../data/driving_cycles/city\VITO_RW_Antwerp1_May19c.mat
maximum steps, simulation is done ... 
Episode: 159 Total reward: -60.33917693961939 Explore P: 0.0198 SOC: 0.6036 Cumulative_SOC_deviation: 5.0177 Fuel Consumption: 10.1624
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.

maximum steps, simulation is done ... 
Episode: 189 Total reward: -73.58910082661461 Explore P: 0.0143 SOC: 0.5956 Cumulative_SOC_deviation: 6.5809 Fuel Consumption: 7.7798
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -74.32649064269073 SOC: 0.5976 Cumulative_SOC_deviation: 6.1204 Fuel Consumption: 13.1227
******************* Test is done *****************

../data/driving_cycles/city\ny_city_traffic.mat
maximum steps, simulation is done ... 
Episode: 190 Total reward: -32.840475305716886 Explore P: 0.0142 SOC: 0.5998 Cumulative_SOC_deviation: 2.7746 Fuel Consumption: 5.0947
../data/driving_cycles/city\VITO_RW_Decade_Jumper_BCN_City1.mat
maximum steps, simulation is done ... 
Episode: 191 Total reward: -109.91596949519895 Explore P: 0.0141 SOC: 0.5958 Cumulative_SOC_deviation: 9.1523 Fuel Consumption: 18.3933
../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Medium_1.mat
maximum steps, 

maximum steps, simulation is done ... 
Episode: 220 Total reward: -199.51511310706286 Explore P: 0.0118 SOC: 0.6014 Cumulative_SOC_deviation: 17.2846 Fuel Consumption: 26.6689
../data/driving_cycles/city\ny_city_composite_truck.mat
maximum steps, simulation is done ... 
Episode: 221 Total reward: -77.23956385963405 Explore P: 0.0118 SOC: 0.6015 Cumulative_SOC_deviation: 6.7519 Fuel Consumption: 9.7209
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 222 Total reward: -63.455056322757336 Explore P: 0.0117 SOC: 0.5937 Cumulative_SOC_deviation: 5.5663 Fuel Consumption: 7.7921
../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episode: 223 Total reward: -51.17334849062308 Explore P: 0.0117 SOC: 0.6063 Cumulative_SOC_deviation: 3.9628 Fuel Consumption: 11.5456
../data/driving_cycles/city\VITO_DUBDC.mat
maximum steps, simulation is done ... 
Episode: 224 Total reward: -66.19109491235733 Explore P: 0.0117 SOC: 0.604

maximum steps, simulation is done ... 
Episode: 253 Total reward: -114.70006480516523 Explore P: 0.0108 SOC: 0.6086 Cumulative_SOC_deviation: 9.6437 Fuel Consumption: 18.2626
../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episode: 254 Total reward: -88.07261349895089 Explore P: 0.0107 SOC: 0.6044 Cumulative_SOC_deviation: 7.6859 Fuel Consumption: 11.2136
../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
Episode: 255 Total reward: -1577.144078749043 Explore P: 0.0107 SOC: 1.0000 Cumulative_SOC_deviation: 151.4463 Fuel Consumption: 62.6808
../data/driving_cycles/city\VITO_RW_Decade_Octavia_BCN_City1.mat
maximum steps, simulation is done ... 
Episode: 256 Total reward: -101.09126311713958 Explore P: 0.0107 SOC: 0.6034 Cumulative_SOC_deviation: 8.3615 Fuel Consumption: 17.4759
../data/driving_cycles/city\VITO_RW_Decade_Octavia_BCN_City1.mat
maximum steps, simulation is done ... 
Episode: 257 Total reward: -140.86163469095

maximum steps, simulation is done ... 
Episode: 285 Total reward: -239.09967540329245 Explore P: 0.0103 SOC: 0.6662 Cumulative_SOC_deviation: 22.6114 Fuel Consumption: 12.9852
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 286 Total reward: -142.23894417058796 Explore P: 0.0103 SOC: 0.6214 Cumulative_SOC_deviation: 13.2891 Fuel Consumption: 9.3476
../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Empty_1.mat
maximum steps, simulation is done ... 
Episode: 287 Total reward: -215.8868498475817 Explore P: 0.0103 SOC: 0.6091 Cumulative_SOC_deviation: 18.4532 Fuel Consumption: 31.3547
../data/driving_cycles/city\ny_city_traffic.mat
maximum steps, simulation is done ... 
Episode: 288 Total reward: -81.11716623505377 Explore P: 0.0103 SOC: 0.6193 Cumulative_SOC_deviation: 7.4626 Fuel Consumption: 6.4909
../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
Episode: 289 Total reward: -141.59224545397615 Explore P: 0.010

maximum steps, simulation is done ... 
Episode: 318 Total reward: -113.1636655617954 Explore P: 0.0101 SOC: 0.6009 Cumulative_SOC_deviation: 8.1933 Fuel Consumption: 31.2306
../data/driving_cycles/city\manhattan.mat
maximum steps, simulation is done ... 
Episode: 319 Total reward: -180.09440094109814 Explore P: 0.0101 SOC: 0.6165 Cumulative_SOC_deviation: 17.0882 Fuel Consumption: 9.2128
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -231.41364122434032 SOC: 0.6230 Cumulative_SOC_deviation: 21.7070 Fuel Consumption: 14.3434
******************* Test is done *****************

../data/driving_cycles/city\VITO_RW_Decade_Jumper_BCN_City1.mat
maximum steps, simulation is done ... 
Episode: 320 Total reward: -264.19039752166816 Explore P: 0.0101 SOC: 0.6217 Cumulative_SOC_deviation: 24.5147 Fuel Consumption: 19.0438
../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done .

maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -161.90150560624213 SOC: 0.6157 Cumulative_SOC_deviation: 14.7608 Fuel Consumption: 14.2930
******************* Test is done *****************

../data/driving_cycles/city\VITO_RW_BUS_VH_Brussels_Empty_1.mat
maximum steps, simulation is done ... 
Episode: 350 Total reward: -114.1393463261103 Explore P: 0.0100 SOC: 0.6120 Cumulative_SOC_deviation: 8.1279 Fuel Consumption: 32.8599
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 351 Total reward: -92.87566384919394 Explore P: 0.0100 SOC: 0.6086 Cumulative_SOC_deviation: 8.4153 Fuel Consumption: 8.7229
../data/driving_cycles/city\VITO_RW_BUS_TMB_Line24N_1.mat
maximum steps, simulation is done ... 
Episode: 352 Total reward: -273.34604904522513 Explore P: 0.0100 SOC: 0.6072 Cumulative_SOC_deviation: 25.5028 Fuel Consumption: 18.3184
../data/driving_cycles

maximum steps, simulation is done ... 
Episode: 381 Total reward: -228.75956066851614 Explore P: 0.0100 SOC: 0.6123 Cumulative_SOC_deviation: 21.0878 Fuel Consumption: 17.8814
../data/driving_cycles/city\06_udds_truck.mat
maximum steps, simulation is done ... 
Episode: 382 Total reward: -238.12077884320198 Explore P: 0.0100 SOC: 0.6245 Cumulative_SOC_deviation: 19.4537 Fuel Consumption: 43.5833
../data/driving_cycles/city\07_manhattan.mat
maximum steps, simulation is done ... 
Episode: 383 Total reward: -203.8111517407418 Explore P: 0.0100 SOC: 0.6206 Cumulative_SOC_deviation: 19.4010 Fuel Consumption: 9.8007
../data/driving_cycles/city\nuremberg_r36.mat
maximum steps, simulation is done ... 
Episode: 384 Total reward: -164.82635276408092 Explore P: 0.0100 SOC: 0.6085 Cumulative_SOC_deviation: 15.3590 Fuel Consumption: 11.2367
../data/driving_cycles/city\VITO_RW_Decade_Octavia_MOL_City1.mat
maximum steps, simulation is done ... 
Episode: 385 Total reward: -133.2235998011374 Explore P: 

In [14]:
with open("DDQN4.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [15]:
# with open("results/replay_memory_size_effect.pkl", "rb") as f: 
#     data = pickle.load(f)
    
# data