In [1]:
import joblib
import numpy as np
import pandas as pd

In [2]:
rf_model = joblib.load("rf_intrusion_model.pkl")
scaler = joblib.load("scaler.pkl")
le = joblib.load("label_encoder.pkl")

In [3]:
FEATURE_COLUMNS = [
    "Destination Port", "Flow Duration", "Total Fwd Packets", "Total Length of Fwd Packets",
    "Fwd Packet Length Max", "Fwd Packet Length Min", "Fwd Packet Length Mean",
    "Fwd Packet Length Std", "Bwd Packet Length Max", "Bwd Packet Length Min",
    "Bwd Packet Length Mean", "Bwd Packet Length Std", "Flow Bytes/s", "Flow Packets/s",
    "Flow IAT Mean", "Flow IAT Std", "Flow IAT Max", "Flow IAT Min", "Fwd IAT Total",
    "Fwd IAT Mean", "Fwd IAT Std", "Fwd IAT Max", "Fwd IAT Min", "Bwd IAT Total",
    "Bwd IAT Mean", "Bwd IAT Std", "Bwd IAT Max", "Bwd IAT Min", "Fwd Header Length",
    "Bwd Header Length", "Fwd Packets/s", "Bwd Packets/s", "Min Packet Length",
    "Max Packet Length", "Packet Length Mean", "Packet Length Std", "Packet Length Variance",
    "FIN Flag Count", "PSH Flag Count", "ACK Flag Count", "Average Packet Size",
    "Subflow Fwd Bytes", "Init_Win_bytes_forward", "Init_Win_bytes_backward",
    "act_data_pkt_fwd", "min_seg_size_forward", "Active Mean", "Active Max", "Active Min",
    "Idle Mean", "Idle Max", "Idle Min"
]


In [4]:
def predict_intrusion(input_data):
    """
    Predicts intrusion type from new data.

    input_data: can be
        - a list or 1D array of feature values (single record)
        - a pandas DataFrame with matching columns (multiple records)
    """
    # Handle single sample case
    if isinstance(input_data, (list, np.ndarray)):
        X = np.array(input_data).reshape(1, -1)
        X_scaled = scaler.transform(X)
    elif isinstance(input_data, pd.DataFrame):
        X_scaled = scaler.transform(input_data[FEATURE_COLUMNS])
    else:
        raise ValueError("Input must be a list, numpy array, or pandas DataFrame.")

    # Model prediction
    pred_class = rf_model.predict(X_scaled)
    pred_prob = rf_model.predict_proba(X_scaled)

    # Decode label(s)
    decoded = le.inverse_transform(pred_class)

    # Confidence scores (max probability per sample)
    confidences = np.max(pred_prob, axis=1) * 100

    # Combine results
    results = pd.DataFrame({
        "Predicted Attack Type": decoded,
        "Confidence (%)": confidences.round(2)
    })
    return results

In [5]:
if __name__ == "__main__":
    # Option 1: Predict from single record
    sample = [443, 123456, 20, 1200, 1500, 60, 300, 100, 200, 10, 100, 50, 12000, 0.4,
              50, 10, 200, 20, 400, 200, 100, 20, 200, 20, 100, 10, 200, 50, 20, 40,
              200, 100, 50, 100, 60, 300, 1, 1, 1, 300, 1200, 100, 200, 10, 50, 10,
              50, 20, 20, 10, 10, 10]
    
    print("Single Sample Prediction:")
    print(predict_intrusion(sample))

Single Sample Prediction:
  Predicted Attack Type  Confidence (%)
0        Normal Traffic           80.32


[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done  26 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done  26 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.0s finished
