In [1]:
import tensorflow as tf 
import numpy as np 
from tensorflow import keras 
import os 
import math 
import random 
import pickle 
import matplotlib.pyplot as plt 
from collections import deque 
import glob 

from vehicle_model_DDQN1 import Environment 
from cell_model import CellModel 

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
drving_cycle = '../../OC_SIM_DB/OC_SIM_DB_Cycles/Highway/01_FTP72_fuds.mat'
battery_path = "../../OC_SIM_DB/OC_SIM_DB_Bat/OC_SIM_DB_Bat_nimh_6_240_panasonic_MY01_Prius.mat"
motor_path = "../../OC_SIM_DB/OC_SIM_DB_Mot/OC_SIM_DB_Mot_pm_95_145_X2.mat"
cell_model = CellModel()
env = Environment(cell_model, drving_cycle, battery_path, motor_path, 1)


In [3]:
# STATE_SIZE = env.calculation_comp["state_size"]
STATE_SIZE = 4
ACTION_SIZE = env.calculation_comp["action_size"] 
LEARNING_RATE = 0.00025 

TOTAL_EPISODES = 400
MAX_STEPS = 50000 

GAMMA = 0.95 

MAX_EPSILON = 1 
MIN_EPSILON = 0.01 
DECAY_RATE = 0.00002
BATCH_SIZE = 32 
TAU = 0.001 
DELAY_TRAINING = 10000
EPSILON_MIN_ITER = 5000

In [4]:
primary_network = keras.Sequential([
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()), 
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(ACTION_SIZE),
])
target_network = keras.Sequential([
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()), 
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#     keras.layers.BatchNormalization(), 
    keras.layers.Dense(ACTION_SIZE),
])

primary_network.compile(
    loss="mse", 
    optimizer=keras.optimizers.Adam(lr=LEARNING_RATE) 
)

# for t, p in zip(target_network.trainable_variables, primary_network.trainable_variables): 
#     t.assign(p)

In [5]:
def update_network(primary_network, target_network): 
    for t, p in zip(target_network.trainable_variables, primary_network.trainable_variables): 
        t.assign(t * (1 - TAU) + p * TAU)

In [6]:
class Memory: 
    def __init__(self, max_memory): 
        self.max_memory = max_memory 
        self._samples = [] 
        
    def add_sample(self, sample): 
        self._samples.append(sample)
        if len(self._samples) > self.max_memory: 
            self._samples.pop(0)
        
    def sample(self, no_samples): 
        if no_samples > len(self._samples): 
            return random.sample(self._samples, len(self._samples))
        else: 
            return random.sample(self._samples, no_samples)
    
    @property
    def num_samples(self):
        return len(self._samples)
    

# memory = Memory(10000)

In [7]:
def choose_action(state, primary_network, eps): 
    if random.random() < eps: 
        return random.randint(0, ACTION_SIZE - 1)
    else: 
        return np.argmax(primary_network(np.array(state).reshape(1, -1))) 

In [8]:
def train(primary_network, target_network, memory): 
    batch = memory.sample(BATCH_SIZE)
    states = np.array([val[0] for val in batch]) 
    actions = np.array([val[1] for val in batch])
    rewards = np.array([val[2] for val in batch])
    next_states = np.array([np.zeros(STATE_SIZE) if val[3] is None else val[3]  
                            for val in batch])
    
    prim_qt = primary_network(states)
    prim_qtp1 = primary_network(next_states)
    target_q = prim_qt.numpy() 
    updates = rewards 
    valid_idxs = next_states.sum(axis=1) != 0 
    batch_idxs = np.arange(BATCH_SIZE)
    prim_action_tp1 = np.argmax(prim_qtp1.numpy(), axis=1)
    q_from_target = target_network(next_states)
    updates[valid_idxs] += GAMMA * q_from_target.numpy()[batch_idxs[valid_idxs], 
                                                        prim_action_tp1[valid_idxs]]
    
    target_q[batch_idxs, actions] = updates 
    loss = primary_network.train_on_batch(states, target_q)
    return loss 

In [9]:
def initialization():
    memory = Memory(10000)
    
    primary_network = keras.Sequential([
        keras.layers.Dense(30, activation="relu", input_shape=[STATE_SIZE], 
                           kernel_initializer=keras.initializers.he_normal()),
#         keras.layers.BatchNormalization(),  
        keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#         keras.layers.BatchNormalization(), 
        keras.layers.Dense(ACTION_SIZE),
    ])
    target_network = keras.Sequential([
        keras.layers.Dense(30, activation="relu", input_shape=[STATE_SIZE], 
                           kernel_initializer=keras.initializers.he_normal()), 
#         keras.layers.BatchNormalization(), 
        keras.layers.Dense(30, activation="relu", kernel_initializer=keras.initializers.he_normal()),
#         keras.layers.BatchNormalization(), 
        keras.layers.Dense(ACTION_SIZE),
    ])
    primary_network.compile(
        loss="mse", 
        optimizer=keras.optimizers.Adam(lr=LEARNING_RATE) 
    )
    return memory, primary_network, target_network 
    

In [10]:
def initialization_env(driving_path, reward_factor):
    env = Environment(cell_model, driving_path, battery_path, motor_path, reward_factor)
    return env 

In [11]:
def save_weights(primary_net, target_net, root): 
    primary_net.save_weights("./{}/primary_net_checkpoint".format(root))
    target_net.save_weights("./{}/target_net_checkpoint".format(root))
    print("model is saved..")

In [12]:
def test_agent(primary_network, reward_factor, test_path_start): 
    test_cycles = glob.glob("../data/driving_cycles/city/*.mat")[test_path_start:]
    test_cycle = np.random.choice(test_cycles)
    env = initialization_env(test_cycle, reward_factor)
    
    total_reward = 0 
    state = env.reset()
    while True: 
        action = np.argmax(primary_network(np.array(state).reshape(1, -1))) 
        next_state, reward, done = env.step(action)
        
        state = next_state 
        total_reward += reward 
        
        if done: 
            break 
            
    SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
    print("******************* Test is done *****************")
    print(test_cycle)
    print('Total reward: {}'.format(total_reward), 
          "SOC: {:.4f}".format(env.SOC), 
          "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
          "Fuel Consumption: {:.4f}".format(env.fuel_consumption))
    print("******************* Test is done *****************")
    print("")
    return env.history 
    

In [13]:
print("environment version: {}".format(env.version)) 

 
reward_factors = [10] 
results_dict = {} 
driving_cycle_paths = glob.glob("../data/driving_cycles/city/*.mat")[:1]

for trial, reward_factor in enumerate(reward_factors): 
    eps = MAX_EPSILON 
    steps = 0
    episode_rewards = [] 
    episode_SOCs = [] 
    episode_FCs = [] 
    episode_test_history = [] 
    episode_num_test = [] 
    
    memory, primary_network, target_network = initialization()
#     for episode in range(TOTAL_EPISODES):
    for episode in range(TOTAL_EPISODES): 
        k = episode % len(driving_cycle_paths)
        driving_cycle_path = driving_cycle_paths[k]
#         driving_cycle_path = np.random.choice(driving_cycle_paths)
        print(driving_cycle_path)
        env = initialization_env(driving_cycle_path, reward_factor)
        state = env.reset() 

        avg_loss = 0 
        total_reward = 0
        cnt = 1 

        while True:
            action = choose_action(state, primary_network, eps)
            next_state, reward, done = env.step(action)
            total_reward += reward 
            if done: 
                next_state = None 
            memory.add_sample((state, action, reward, next_state))

            if steps > DELAY_TRAINING: 
                loss = train(primary_network, target_network, memory)
                update_network(primary_network, target_network)
                eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * np.exp(-DECAY_RATE * (steps - 
                                                                        DELAY_TRAINING))
            else: 
                loss = -1

            avg_loss += loss 
            steps += 1 

            if done: 
                SOC_deviation_history = np.sum(np.abs(np.array(env.history["SOC"]) - 0.6)) 
                avg_loss /= cnt 
                print('Episode: {}'.format(episode),
                      'Total reward: {}'.format(total_reward), 
                      'Explore P: {:.4f}'.format(eps), 
                      "SOC: {:.4f}".format(env.SOC), 
                     "Cumulative_SOC_deviation: {:.4f}".format(SOC_deviation_history), 
                     "Fuel Consumption: {:.4f}".format(env.fuel_consumption), 
                     )
                
                episode_rewards.append(total_reward)
                episode_SOCs.append(env.SOC)
                episode_FCs.append(env.fuel_consumption)
                break 

            state = next_state 
            cnt += 1 
        
        if (episode + 1) % 10 == 0: 
            history = test_agent(primary_network, reward_factor, -1) 
            episode_test_history.append(history) 
            episode_num_test.append(episode + 1)
            
            
    root = "DDQN1_trial{}".format(trial+1)
    save_weights(primary_network, target_network, root)
    
    results_dict[reward_factor] = {
        "rewards": episode_rewards, 
        "SOCs": episode_SOCs, 
        "FCs": episode_FCs, 
        "test_history": episode_test_history, 
        "test_episode_num": episode_num_test, 
    }
            
    

environment version: 1
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 0 Total reward: -4616.119552085202 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 446.2460 Fuel Consumption: 153.6599
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 1 Total reward: -4634.81117890356 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 447.7157 Fuel Consumption: 157.6542
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 2 Total reward: -4670.034271843214 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 451.9287 Fuel Consumption: 150.7476
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 3 Total reward: -4585.995891921205 Explore P: 1.0000 SOC: 1.0000 Cumulative_SOC_deviation: 443.1049 Fuel Consumption: 154.9473
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 

maximum steps, simulation is done ... 
Episode: 28 Total reward: -3612.8067888503406 Explore P: 0.5553 SOC: 1.0000 Cumulative_SOC_deviation: 348.5735 Fuel Consumption: 127.0723
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 29 Total reward: -3678.603745065231 Explore P: 0.5406 SOC: 1.0000 Cumulative_SOC_deviation: 354.5548 Fuel Consumption: 133.0561
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -213.17419056942995 SOC: 0.6206 Cumulative_SOC_deviation: 19.8940 Fuel Consumption: 14.2345
******************* Test is done *****************

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 30 Total reward: -3550.560430529476 Explore P: 0.5262 SOC: 1.0000 Cumulative_SOC_deviation: 343.0841 Fuel Consumption: 119.7194
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episod

maximum steps, simulation is done ... 
Episode: 60 Total reward: -700.8976595642315 Explore P: 0.2365 SOC: 0.7452 Cumulative_SOC_deviation: 64.3289 Fuel Consumption: 57.6085
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 61 Total reward: -1562.9048397383117 Explore P: 0.2303 SOC: 1.0000 Cumulative_SOC_deviation: 148.3443 Fuel Consumption: 79.4616
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 62 Total reward: -1741.780126918775 Explore P: 0.2244 SOC: 1.0000 Cumulative_SOC_deviation: 164.3107 Fuel Consumption: 98.6733
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 63 Total reward: -581.7862050265718 Explore P: 0.2186 SOC: 0.7962 Cumulative_SOC_deviation: 52.0703 Fuel Consumption: 61.0828
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 64 Total reward: -1847.40369184373 Explore P: 0.2129 SOC: 1.0000 Cumu

maximum steps, simulation is done ... 
Episode: 94 Total reward: -450.23407437897646 Explore P: 0.0990 SOC: 0.5932 Cumulative_SOC_deviation: 40.3894 Fuel Consumption: 46.3403
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 95 Total reward: -438.8614778486552 Explore P: 0.0966 SOC: 0.6180 Cumulative_SOC_deviation: 39.0857 Fuel Consumption: 48.0047
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 96 Total reward: -458.20493475483374 Explore P: 0.0943 SOC: 0.6117 Cumulative_SOC_deviation: 41.0309 Fuel Consumption: 47.8958
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 97 Total reward: -430.9074632519055 Explore P: 0.0920 SOC: 0.5982 Cumulative_SOC_deviation: 38.4926 Fuel Consumption: 45.9811
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 98 Total reward: -379.5519345936831 Explore P: 0.0898 SOC: 0.5858 Cumu

maximum steps, simulation is done ... 
Episode: 128 Total reward: -329.7086538890124 Explore P: 0.0450 SOC: 0.6469 Cumulative_SOC_deviation: 28.2699 Fuel Consumption: 47.0099
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 129 Total reward: -255.3754431796916 Explore P: 0.0441 SOC: 0.6191 Cumulative_SOC_deviation: 21.0717 Fuel Consumption: 44.6583
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -116.04647087485215 SOC: 0.6149 Cumulative_SOC_deviation: 10.2471 Fuel Consumption: 13.5758
******************* Test is done *****************

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 130 Total reward: -264.6441731001121 Explore P: 0.0431 SOC: 0.6216 Cumulative_SOC_deviation: 21.9802 Fuel Consumption: 44.8420
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 1

maximum steps, simulation is done ... 
Episode: 160 Total reward: -199.47228803851834 Explore P: 0.0245 SOC: 0.6192 Cumulative_SOC_deviation: 15.5132 Fuel Consumption: 44.3398
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 161 Total reward: -217.39402776116927 Explore P: 0.0241 SOC: 0.6180 Cumulative_SOC_deviation: 17.2895 Fuel Consumption: 44.4991
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 162 Total reward: -184.0993826751701 Explore P: 0.0238 SOC: 0.6125 Cumulative_SOC_deviation: 14.0176 Fuel Consumption: 43.9234
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 163 Total reward: -211.6625709227874 Explore P: 0.0234 SOC: 0.6136 Cumulative_SOC_deviation: 16.7225 Fuel Consumption: 44.4378
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 164 Total reward: -193.46873603808348 Explore P: 0.0230 SOC: 0.617

maximum steps, simulation is done ... 
Episode: 194 Total reward: -162.53457831786721 Explore P: 0.0157 SOC: 0.6091 Cumulative_SOC_deviation: 11.9224 Fuel Consumption: 43.3110
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 195 Total reward: -148.74867677119073 Explore P: 0.0156 SOC: 0.6141 Cumulative_SOC_deviation: 10.4825 Fuel Consumption: 43.9234
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 196 Total reward: -207.2837978643289 Explore P: 0.0154 SOC: 0.6132 Cumulative_SOC_deviation: 16.2650 Fuel Consumption: 44.6338
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 197 Total reward: -180.53227997354318 Explore P: 0.0153 SOC: 0.6143 Cumulative_SOC_deviation: 13.6646 Fuel Consumption: 43.8867
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 198 Total reward: -145.21824356496884 Explore P: 0.0151 SOC: 0.61

maximum steps, simulation is done ... 
Episode: 228 Total reward: -142.7468852357794 Explore P: 0.0122 SOC: 0.6133 Cumulative_SOC_deviation: 9.9191 Fuel Consumption: 43.5560
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 229 Total reward: -119.91307621909138 Explore P: 0.0122 SOC: 0.6090 Cumulative_SOC_deviation: 7.6933 Fuel Consumption: 42.9803
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -238.44008838798354 SOC: 0.6241 Cumulative_SOC_deviation: 22.4203 Fuel Consumption: 14.2372
******************* Test is done *****************

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 230 Total reward: -137.91747113506818 Explore P: 0.0121 SOC: 0.6037 Cumulative_SOC_deviation: 9.5096 Fuel Consumption: 42.8211
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 23

maximum steps, simulation is done ... 
Episode: 260 Total reward: -169.04144464905627 Explore P: 0.0109 SOC: 0.6093 Cumulative_SOC_deviation: 12.5681 Fuel Consumption: 43.3600
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 261 Total reward: -153.7612533379368 Explore P: 0.0109 SOC: 0.6104 Cumulative_SOC_deviation: 11.1001 Fuel Consumption: 42.7598
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 262 Total reward: -163.7860601739984 Explore P: 0.0109 SOC: 0.6100 Cumulative_SOC_deviation: 11.9814 Fuel Consumption: 43.9724
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 263 Total reward: -152.40639376824916 Explore P: 0.0109 SOC: 0.6132 Cumulative_SOC_deviation: 10.9328 Fuel Consumption: 43.0783
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 264 Total reward: -141.41350900648533 Explore P: 0.0108 SOC: 0.612

maximum steps, simulation is done ... 
Episode: 294 Total reward: -116.04746509833971 Explore P: 0.0104 SOC: 0.6039 Cumulative_SOC_deviation: 7.3569 Fuel Consumption: 42.4781
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 295 Total reward: -132.98753844764593 Explore P: 0.0104 SOC: 0.6070 Cumulative_SOC_deviation: 8.9970 Fuel Consumption: 43.0170
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 296 Total reward: -121.1114664119446 Explore P: 0.0103 SOC: 0.6091 Cumulative_SOC_deviation: 7.7849 Fuel Consumption: 43.2620
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 297 Total reward: -126.82489410961612 Explore P: 0.0103 SOC: 0.6078 Cumulative_SOC_deviation: 8.3318 Fuel Consumption: 43.5070
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 298 Total reward: -123.2756457561794 Explore P: 0.0103 SOC: 0.6144 Cu

maximum steps, simulation is done ... 
Episode: 328 Total reward: -114.34618373726508 Explore P: 0.0101 SOC: 0.6095 Cumulative_SOC_deviation: 7.1035 Fuel Consumption: 43.3110
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 329 Total reward: -115.77301244075399 Explore P: 0.0101 SOC: 0.6081 Cumulative_SOC_deviation: 7.2695 Fuel Consumption: 43.0783
maximum steps, simulation is done ... 
******************* Test is done *****************
../data/driving_cycles/city\wvucity.mat
Total reward: -98.97798829102538 SOC: 0.6083 Cumulative_SOC_deviation: 8.5806 Fuel Consumption: 13.1717
******************* Test is done *****************

../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 330 Total reward: -120.75479898225133 Explore P: 0.0101 SOC: 0.6087 Cumulative_SOC_deviation: 7.7554 Fuel Consumption: 43.2008
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 331

maximum steps, simulation is done ... 
Episode: 360 Total reward: -121.84988350592273 Explore P: 0.0101 SOC: 0.6092 Cumulative_SOC_deviation: 7.7975 Fuel Consumption: 43.8744
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 361 Total reward: -118.80644310492842 Explore P: 0.0101 SOC: 0.6081 Cumulative_SOC_deviation: 7.4405 Fuel Consumption: 44.4011
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 362 Total reward: -112.43427281081816 Explore P: 0.0101 SOC: 0.6030 Cumulative_SOC_deviation: 6.9944 Fuel Consumption: 42.4904
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 363 Total reward: -113.58825002192279 Explore P: 0.0101 SOC: 0.6115 Cumulative_SOC_deviation: 6.9677 Fuel Consumption: 43.9111
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 364 Total reward: -118.83136264635405 Explore P: 0.0101 SOC: 0.6066 

maximum steps, simulation is done ... 
Episode: 394 Total reward: -127.8067506789067 Explore P: 0.0100 SOC: 0.6106 Cumulative_SOC_deviation: 8.4275 Fuel Consumption: 43.5315
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 395 Total reward: -121.29093349907022 Explore P: 0.0100 SOC: 0.6046 Cumulative_SOC_deviation: 7.8433 Fuel Consumption: 42.8578
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 396 Total reward: -130.91395740689336 Explore P: 0.0100 SOC: 0.6117 Cumulative_SOC_deviation: 8.7554 Fuel Consumption: 43.3600
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 397 Total reward: -123.18624748552422 Explore P: 0.0100 SOC: 0.6085 Cumulative_SOC_deviation: 8.0083 Fuel Consumption: 43.1028
../data/driving_cycles/city\01_FTP72_fuds.mat
maximum steps, simulation is done ... 
Episode: 398 Total reward: -122.50897432361236 Explore P: 0.0100 SOC: 0.6057 C

In [14]:
with open("DDQN1.pkl", "wb") as f: 
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [15]:
# with open("results/replay_memory_size_effect.pkl", "rb") as f: 
#     data = pickle.load(f)
    
# data