#  Phishing-URL-Detection - Group Integration

In [1]:
# Core imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Sklearn and friends
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, precision_recall_curve
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression

# Optional SMOTE
try:
    from imblearn.over_sampling import SMOTE
    SMOTE_AVAILABLE = True
except Exception:
    SMOTE_AVAILABLE = False
    try:
        import sys, subprocess
        subprocess.check_call([sys.executable, "-m", "pip", "install", "imbalanced-learn", "-q"])
        from imblearn.over_sampling import SMOTE
        SMOTE_AVAILABLE = True
    except Exception:
        SMOTE_AVAILABLE = False

import warnings, os, io, requests, pickle, time
warnings.filterwarnings("ignore")

In [2]:
# Download dataset directly from GitHub (raw)
RAW_URL = "https://raw.githubusercontent.com/Jerrell-Su/DLI_GroupAJ/main/data/phishing.csv"

def load_dataset_from_github(url: str) -> pd.DataFrame:
    try:
        df = pd.read_csv(url)
        return df
    except Exception:
        import requests, io
        resp = requests.get(url, timeout=60)
        resp.raise_for_status()
        return pd.read_csv(io.StringIO(resp.text))

dataset = load_dataset_from_github(RAW_URL)
print("Dataset shape:", dataset.shape)
X = dataset.drop(["class"], axis=1)
y = dataset["class"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
print("Train size:", X_train.shape, "| Test size:", X_test.shape)

Dataset shape: (11054, 32)\nTrain size: (8843, 31) | Test size: (2211, 31)\n