In [1]:
import time
import xgboost as xgb
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate synthetic data
X, y = make_classification(n_samples=1_000_000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Training parameters
params_gpu = {
    'tree_method': 'hist',
    'objective': 'binary:logistic',
    'verbosity': 1,
    'device': 'cuda',
    'eta': 0.3
}

params_cpu = {
    'tree_method': 'hist',
    'objective': 'binary:logistic',
    'verbosity': 1,
    'eta': 0.3
}

def train_model(params, label):
    print(f"Training with {label}...")
    start = time.time()
    bst = xgb.train(params, dtrain, num_boost_round=100)
    end = time.time()
    preds = bst.predict(dtest)
    preds_binary = [1 if p > 0.5 else 0 for p in preds]
    acc = accuracy_score(y_test, preds_binary)
    print(f"{label} Accuracy: {acc:.4f}")
    print(f"{label} Time: {end - start:.2f} seconds")

# Run both
train_model(params_cpu, "CPU")
print("\n")
train_model(params_gpu, "GPU")


Training with CPU...
CPU Time: 1.54 seconds


Training with GPU...
GPU Time: 0.59 seconds
