# Intrusion Detection Model Training (CICIDS 2017)
This notebook trains a machine learning model to classify network traffic as benign or malicious using the CICIDS 2017 dataset.

In [None]:
# Install imbalanced-learn if not already installed
!pip install -q imbalanced-learn

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from imblearn.over_sampling import SMOTE
import pickle

In [3]:
# Load your dataset (make sure to replace the path with your dataset)
df = pd.read_csv('http_features_log.csv')  # Example filename
df.dropna(inplace=True)
df = df.sample(n=500000, random_state=42) if len(df) > 500000 else df

In [4]:
# Encode labels
le = LabelEncoder()
df['Label'] = le.fit_transform(df['Label'])
y = df['Label']
X = df.drop('Label', axis=1)

In [8]:
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

ValueError: could not convert string to float: '192.168.9.85-192.168.4.247-19766-24761'

In [None]:
# Balance dataset with SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

In [None]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Evaluate
y_pred = clf.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print(classification_report(y_test, y_pred))

In [None]:
# Save model and encoder
with open('ids_model.pkl', 'wb') as f:
    pickle.dump(clf, f)

with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(le, f)