In [1]:
!pip install pandas numpy scikit-learn tensorflow keras matplotlib seaborn flask requests
!pip install elasticsearch kafka-python mysql-connector-python




In [2]:
pip install --upgrade pip

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, classification_report

# Define column names
columns = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 
           'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in',
           'num_compromised', 'root_shell', 'su_attempted', 'num_root', 'num_file_creations',
           'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login', 
           'is_guest_login', 'count', 'srv_count', 'serror_rate', 'srv_serror_rate',
           'rerror_rate', 'srv_rerror_rate', 'same_srv_rate', 'diff_srv_rate', 
           'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate',
           'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate',
           'dst_host_serror_rate', 'dst_host_srv_serror_rate', 'dst_host_rerror_rate',
           'dst_host_srv_rerror_rate', 'attack', 'last_flag']

# Load dataset
try:
    df = pd.read_csv("KDDTrain+.txt", names=columns)
except FileNotFoundError:
    print("Error: File 'KDDTrain+.txt' not found. Please check the file path.")
    exit()

# Encode categorical variables
encoder = LabelEncoder()
for col in ['protocol_type', 'service', 'flag']:
    df[col] = encoder.fit_transform(df[col])

# Encode attack labels (1 = attack, 0 = normal)
df['attack'] = df['attack'].apply(lambda x: 0 if x == 'normal' else 1)

# Normalize numeric features
scaler = StandardScaler()
features = df.drop(columns=['attack'])
labels = df['attack']
features_scaled = scaler.fit_transform(features)

# Split data
X_train, X_test, y_train, y_test = train_test_split(features_scaled, labels, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape}, Testing set size: {X_test.shape}")

# Train Isolation Forest model
model = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
model.fit(X_train)

# Predict anomalies
y_pred = model.predict(X_test)
y_pred = np.where(y_pred == -1, 1, 0)  # Convert -1 (anomalies) to 1, others to 0

# Evaluate model
print("Classification Report:")
print(classification_report(y_test, y_pred))


Training set size: (100778, 42), Testing set size: (25195, 42)
Classification Report:
              precision    recall  f1-score   support

           0       0.55      0.93      0.69     13422
           1       0.64      0.14      0.22     11773

    accuracy                           0.56     25195
   macro avg       0.59      0.53      0.46     25195
weighted avg       0.59      0.56      0.47     25195



In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
columns = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 
           'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in',
           'num_compromised', 'root_shell', 'su_attempted', 'num_root', 'num_file_creations',
           'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login', 
           'is_guest_login', 'count', 'srv_count', 'serror_rate', 'srv_serror_rate',
           'rerror_rate', 'srv_rerror_rate', 'same_srv_rate', 'diff_srv_rate', 
           'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate',
           'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate',
           'dst_host_serror_rate', 'dst_host_srv_serror_rate', 'dst_host_rerror_rate',
           'dst_host_srv_rerror_rate', 'attack', 'last_flag']

# Load dataset
df = pd.read_csv("KDDTrain+.txt", names=columns)

# Encode categorical variables
encoder = LabelEncoder()
df['protocol_type'] = encoder.fit_transform(df['protocol_type'])
df['service'] = encoder.fit_transform(df['service'])
df['flag'] = encoder.fit_transform(df['flag'])

# Encode attack labels (1 = attack, 0 = normal)
df['attack'] = df['attack'].apply(lambda x: 0 if x == 'normal' else 1)

# Normalize numeric features
scaler = StandardScaler()
features = df.drop(columns=['attack'])
labels = df['attack']
features_scaled = scaler.fit_transform(features)

# Split data
X_train, X_test, y_train, y_test = train_test_split(features_scaled, labels, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape}, Testing set size: {X_test.shape}")


Training set size: (100778, 42), Testing set size: (25195, 42)


In [5]:
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, classification_report

# Train Isolation Forest model
model = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
model.fit(X_train)

# Predict anomalies
y_pred = model.predict(X_test)
y_pred = np.where(y_pred == -1, 1, 0)  # Convert -1 (anomalies) to 1, others to 0

# Evaluate model
print("Classification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.55      0.93      0.69     13422
           1       0.64      0.14      0.22     11773

    accuracy                           0.56     25195
   macro avg       0.59      0.53      0.46     25195
weighted avg       0.59      0.56      0.47     25195



In [6]:
from collections.abc import Sequence


In [7]:
import os

def block_ip(ip_address):
    print(f"Blocking IP: {ip_address}")
    os.system(f"sudo iptables -A INPUT -s {ip_address} -j DROP")  # Linux Firewall Rule


In [8]:
import smtplib
from email.mime.text import MIMEText

def send_alert(ip):
    sender_email = "urmishikhadash2004@gmail.com"
    receiver_email = "integrasbiotek@gmail.com"
    subject = "⚠ Intrusion Alert ⚠"
    body = f"Anomaly detected from IP: {ip}. Immediate action required!"

    msg = MIMEText(body)
    msg['Subject'] = subject
    msg['From'] = sender_email
    msg['To'] = receiver_email

    with smtplib.SMTP('smtp.gmail.com', 587) as server:
        server.starttls()
        server.login(sender_email, "wmcgtzwnpkmeermd")
        server.sendmail(sender_email, receiver_email, msg.as_string())

    print(f"Alert sent to {receiver_email}")

send_alert("192.168.1.100")

Alert sent to integrasbiotek@gmail.com


In [9]:
pip install flask flask-sqlalchemy


Note: you may need to restart the kernel to use updated packages.


In [10]:
import joblib

# Save the trained model
print("Saving model...")
joblib.dump(model, 'anomaly_model.pkl')

# Save the scaler
print("Saving scaler...")
joblib.dump(scaler, 'anomaly_model_scaler.pkl')

print("Both files saved successfully!")

Saving model...
Saving scaler...
Both files saved successfully!
