In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVR, SVC
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.utils import resample
import joblib
import pyshark
import nest_asyncio

# Fix async issues for live packet capture
nest_asyncio.apply()

# Load dataset
dataset_path = "C:/Users/Shreshth Arora/Desktop/nslkdd.csv"
columns = ["duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes", "land", "wrong_fragment", 
           "urgent", "hot", "num_failed_logins", "logged_in", "num_compromised", "root_shell", "su_attempted", 
           "num_root", "num_file_creations", "num_shells", "num_access_files", "num_outbound_cmds", "is_host_login", 
           "is_guest_login", "count", "srv_count", "serror_rate", "srv_serror_rate", "rerror_rate", "srv_rerror_rate", 
           "same_srv_rate", "diff_srv_rate", "srv_diff_host_rate", "dst_host_count", "dst_host_srv_count", 
           "dst_host_same_srv_rate", "dst_host_diff_srv_rate", "dst_host_same_src_port_rate", "dst_host_srv_diff_host_rate", 
           "dst_host_serror_rate", "dst_host_srv_serror_rate", "dst_host_rerror_rate", "dst_host_srv_rerror_rate", "label"]

data = pd.read_csv(dataset_path, names=columns, header=None)

# Encode categorical features
label_encoders = {}
categorical_cols = ["protocol_type", "service", "flag"]
for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Encode labels (1 for Normal, 0 for Intrusion)
data['label'] = data['label'].apply(lambda x: 1 if x == 'normal' else 0)

# Balance dataset
intrusion = data[data['label'] == 0]
normal = data[data['label'] == 1]
normal_upsampled = resample(normal, replace=True, n_samples=len(intrusion), random_state=42)
data = pd.concat([intrusion, normal_upsampled])

# Feature selection (remove highly correlated features)
corr_matrix = data.corr()
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]
data.drop(columns=to_drop, inplace=True)

# Split features and labels
X = data.drop(columns=['label'])
y = data['label']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train SVC model instead of SVR
svc = SVC(kernel='rbf', C=100, gamma='scale')
svc.fit(X_train, y_train)

y_pred = svc.predict(X_test)

# Save trained model and encoders
joblib.dump(svc, "svc_model.pkl")
joblib.dump(scaler, "scaler.pkl")
for col, encoder in label_encoders.items():
    joblib.dump(encoder, f"{col}_encoder.pkl")

# Live Packet Capture

def extract_features(packet):
    try:
        features = {}
        protocol = packet.transport_layer or "UNKNOWN"
        features["protocol_type"] = label_encoders["protocol_type"].transform([protocol])[0] if protocol in label_encoders["protocol_type"].classes_ else 0
        features["src_bytes"] = int(packet.length)
        features["dst_bytes"] = int(packet.length)
        flags = getattr(packet, 'tcp', None)
        flags = flags.flags if flags else "UNKNOWN"
        features["flag"] = label_encoders["flag"].transform([flags])[0] if flags in label_encoders["flag"].classes_ else 0
        features["count"] = 1
        features["srv_count"] = 1
        features["same_srv_rate"] = 0.5
        features["diff_srv_rate"] = 0.5
        return features
    except Exception as e:
        print("Error extracting packet features:", e)
        return None

def preprocess_real_time_data(df):
    for col in scaler.feature_names_in_:
        if col not in df.columns:
            df[col] = 0
    return df[scaler.feature_names_in_]

def live_packet_capture(interface="Wi-Fi"):
    capture = pyshark.LiveCapture(interface=interface)
    for packet in capture.sniff_continuously(packet_count=10):
        features = extract_features(packet)
        if features:
            df = pd.DataFrame([features])
            df = preprocess_real_time_data(df)
            df_scaled = scaler.transform(df)
            pred = svc.predict(df_scaled)
            print(f"Predicted: {'Normal' if pred[0] == 1 else 'Intrusion'}")

# Run live packet capture
live_packet_capture()

Predicted: Normal
Predicted: Normal
Predicted: Normal
Predicted: Normal
Predicted: Normal
Predicted: Normal
Predicted: Normal
Predicted: Normal
Predicted: Normal
Predicted: Normal
