In [6]:
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import make_scorer, average_precision_score
from sklearn.datasets import make_classification

# 1. Simulate Dataset
X, y = make_classification(n_samples=1000, n_features=20, 
                           n_informative=2, n_redundant=10,
                           random_state=42)

# Split dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Model Setup
# Initialize a LightGBM classifier
lgbm = lgb.LGBMClassifier()

# 3. Hyperparameter Tuning
# Define a parameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 0.5],
    'max_depth': [3, 5, 7]
}

################################################################
# Define PR-AUC scorer, this is an important step for you to learn
################################################################
pr_auc_scorer = make_scorer(average_precision_score, greater_is_better=True, needs_proba=True)

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=lgbm, param_grid=param_grid, 
                           scoring=pr_auc_scorer, cv=5, verbose=1)

# Fit the model
grid_search.fit(X_train, y_train)

# 4. PR-AUC Evaluation
# Best parameters and best score
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best PR-AUC Score: {grid_search.best_score_}")

# Predict probabilities on the test set
y_proba = grid_search.predict_proba(X_test)[:, 1]

# Calculate PR-AUC on the test set
pr_auc_score = average_precision_score(y_test, y_proba)
print(f"Test PR-AUC Score: {pr_auc_score}")


Fitting 5 folds for each of 27 candidates, totalling 135 fits
Best Parameters: {'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 50}
Best PR-AUC Score: 0.9678078415204847
Test PR-AUC Score: 0.9375397920787583
