In [None]:
!pip install -r requirements.txt



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import dpkt
import socket
import numpy as np
import pandas as pd
import random
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import regularizers, layers
import matplotlib.pyplot as plt

# Hyperparameters
EPISODES = 1000  # Increased to allow more training
EPSILON_START = 1.0
EPSILON_MIN = 0.01
EPSILON_DECAY = 0.995
GAMMA = 0.95  # Discount factor (gamma) for future rewards
BATCH_SIZE = 16
MAX_STEPS_PER_EPISODE = 30
LEARNING_RATE = 0.0001
REPLAY_MEMORY_SIZE = 5000  # Cap the replay buffer to avoid infinite memory usage

In [None]:
dataset = pd.read_csv("/content/drive/MyDrive/CICIDS2017/Friday-WorkingHours-Morning.pcap_ISCX.csv")
dataset.head()

# Remove leading spaces from column names
dataset.columns = dataset.columns.str.strip()

# Feature selection
features = dataset[['Flow Duration', 'Total Fwd Packets', 'Total Backward Packets',
                    'Total Length of Fwd Packets', 'Total Length of Bwd Packets',
                    'Fwd Packet Length Mean', 'Bwd Packet Length Mean',
                    'Flow IAT Mean', 'Flow IAT Std', 'Fwd IAT Mean']]

# Label selection (malicious or benign traffic)
labels = dataset['Label']

# Feature normalization
scaler = StandardScaler()
features = scaler.fit_transform(features)

# Convert labels to binary format (benign = 0, malicious = 1)
labels = labels.apply(lambda x: 1 if 'malicious' in x.lower() else 0)

# Combine features and labels to form the dataset
data = list(zip(features, labels))

In [None]:
# Epsilon decay function for action exploration
def epsilon_decay(episode, initial_epsilon=EPSILON_START, min_epsilon=EPSILON_MIN, decay_rate=EPSILON_DECAY):
    return max(min_epsilon, initial_epsilon * (decay_rate ** episode))

# Q-Network Definition ====================================

# Define the Q-network model
def create_q_model():
    inputs = layers.Input(shape=(state_size,))
    layer1 = layers.Dense(256, activation="relu", kernel_regularizer=regularizers.l2(0.01))(inputs)
    layer2 = layers.Dense(256, activation="relu")(layer1)
    layer3 = layers.Dense(128, activation="relu")(layer2)
    action = layers.Dense(num_actions, activation="linear")(layer3)
    return tf.keras.Model(inputs=inputs, outputs=action)

# Hyperparameters for the Q-network
state_size = 10  # Based on the number of features
num_actions = 3  # Actions: Allow, Block, Flag

# Create and compile the Q-network model
model = create_q_model()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE), loss='mse')

# Action Selection and Environment Interaction ====================================

# Epsilon-greedy action selection
def choose_action(state, epsilon):
    if np.random.random() < epsilon:
        return np.random.choice(num_actions)  # Random action (exploration)
    else:
        q_values = model.predict(np.expand_dims(state, axis=0), verbose=0)  # Predict action values
        return np.argmax(q_values)  # Choose action with highest Q-value

# Function to get the initial state from the dataset
def get_initial_state(index=None):
    if index is None:
        index = np.random.randint(0, len(data))
    state, label = data[index]
    return state, label


In [None]:
# Execute action and return the next state, reward, and done flag
def execute_action(action, current_state, step):
    next_state, label = get_initial_state()  # Get next state and its label (benign or malicious)

    # Define reward structure
    if action == 0:  # Allow
        reward = -3 if label == 1 else 2  # Penalty for allowing malicious, reward for benign
    elif action == 1:  # Block
        reward = 3 if label == 1 else -1  # Reward for blocking malicious, penalty for false positive
    else:  # Flag
        reward = 0.5 if label == 1 else 0  # Small reward for flagging malicious, neutral for benign

    done = (step >= MAX_STEPS_PER_EPISODE) or (np.random.rand() > 0.90)  # Properly end episodes
    return next_state, reward, done

In [None]:
# Function to train the agent and visualize rewards
def train_and_visualize(episodes):
    rewards_per_episode = []
    memory = []  # Replay memory to store experiences

    for episode in range(episodes):
        state, _ = get_initial_state()
        total_reward = 0
        done = False
        step = 0
        epsilon = epsilon_decay(episode)

        print(f"\n==== Starting Episode {episode} ====")

        while not done and step < MAX_STEPS_PER_EPISODE:  # Limit the number of steps per episode
            step += 1
            action = choose_action(state, epsilon)
            next_state, reward, done = execute_action(action, state, step)
            total_reward += reward

            # Store transition in memory (state, action, reward, next_state, done)
            memory.append((state, action, reward, next_state, done))

            # Limit memory size to avoid infinite growth
            if len(memory) > REPLAY_MEMORY_SIZE:
                memory.pop(0)

            # Train the model if we have enough experiences in memory
            if len(memory) >= BATCH_SIZE:
                # Randomly sample a batch from memory
                batch = random.sample(memory, BATCH_SIZE)

                # Separate each part of the batch into state, action, reward, etc.
                states_b, actions_b, rewards_b, next_states_b, dones_b = zip(*batch)

                # Convert each part to numpy arrays
                states_b = np.array(states_b)
                actions_b = np.array(actions_b)
                rewards_b = np.array(rewards_b)
                next_states_b = np.array(next_states_b)
                dones_b = np.array(dones_b)

                # Train the model using the batch
                for i in range(BATCH_SIZE):
                    next_q_values = model.predict(np.expand_dims(next_states_b[i], axis=0), verbose=0)
                    max_next_q_value = np.max(next_q_values)  # Max Q-value for the next state

                    # Calculate target Q-value
                    target_q_value = rewards_b[i] + GAMMA * max_next_q_value * (not dones_b[i])

                    # Predict Q-values for the current state and update the selected action's Q-value
                    q_values = model.predict(np.expand_dims(states_b[i], axis=0), verbose=0)
                    q_values[0][actions_b[i]] = target_q_value

                    # Train the model on this step
                    model.fit(np.expand_dims(states_b[i], axis=0), q_values, epochs=1, verbose=0)

            # Move to the next state
            state = next_state

        # Force done if we hit the maximum step count
        rewards_per_episode.append(total_reward)
        print(f"Total Reward for Episode {episode}: {total_reward}")
        print(f"==== End of Episode {episode} ====\n")

    # Plot the reward progress over episodes
    plt.plot(rewards_per_episode)
    plt.xlabel("Episode")
    plt.ylabel("Total Reward")
    plt.title("Reward Progress Over Episodes")
    plt.show()

In [None]:
# Run the training process with batch learning
train_and_visualize(EPISODES)


==== Starting Episode 0 ====
Total Reward for Episode 0: 0
==== End of Episode 0 ====


==== Starting Episode 1 ====
Total Reward for Episode 1: 5
==== End of Episode 1 ====


==== Starting Episode 2 ====
Total Reward for Episode 2: -4
==== End of Episode 2 ====


==== Starting Episode 3 ====
Total Reward for Episode 3: 1
==== End of Episode 3 ====


==== Starting Episode 4 ====
Total Reward for Episode 4: 2
==== End of Episode 4 ====


==== Starting Episode 5 ====
Total Reward for Episode 5: 5
==== End of Episode 5 ====


==== Starting Episode 6 ====
Total Reward for Episode 6: 5
==== End of Episode 6 ====


==== Starting Episode 7 ====
Total Reward for Episode 7: 5
==== End of Episode 7 ====


==== Starting Episode 8 ====
Total Reward for Episode 8: 4
==== End of Episode 8 ====


==== Starting Episode 9 ====
Total Reward for Episode 9: 4
==== End of Episode 9 ====


==== Starting Episode 10 ====
Total Reward for Episode 10: 5
==== End of Episode 10 ====


==== Starting Episode 11 ==