In [2]:
from lunax.data_processing.utils import *
from lunax.models import xgb_clf
from lunax.hyper_opt import OptunaTuner

In [3]:
df_train = load_data('./example_data/classification/train.csv', 'csv')
df_train

Unnamed: 0,id,day,pressure,maxtemp,temparature,mintemp,dewpoint,humidity,cloud,sunshine,winddirection,windspeed,rainfall
0,0,1,1017.4,21.2,20.6,19.9,19.4,87.0,88.0,1.1,60.0,17.2,1
1,1,2,1019.5,16.2,16.9,15.8,15.4,95.0,91.0,0.0,50.0,21.9,1
2,2,3,1024.1,19.4,16.1,14.6,9.3,75.0,47.0,8.3,70.0,18.1,1
3,3,4,1013.4,18.1,17.8,16.9,16.8,95.0,95.0,0.0,60.0,35.6,1
4,4,5,1021.8,21.3,18.4,15.2,9.6,52.0,45.0,3.6,40.0,24.8,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2185,2185,361,1014.6,23.2,20.6,19.1,19.9,97.0,88.0,0.1,40.0,22.1,1
2186,2186,362,1012.4,17.2,17.3,16.3,15.3,91.0,88.0,0.0,50.0,35.3,1
2187,2187,363,1013.3,19.0,16.3,14.3,12.6,79.0,79.0,5.0,40.0,32.9,1
2188,2188,364,1022.3,16.4,15.2,13.8,14.7,92.0,93.0,0.1,40.0,18.0,1


In [4]:
target = 'rainfall'

In [5]:
df_train = preprocess_data(df_train, target)

In [6]:
X_train, X_val, y_train, y_val = split_data(df_train, target)

In [7]:
# 创建调参器 使用默认搜索空间
tuner = OptunaTuner(
    n_trials=3,
    model_class="XGBClassifier",
)

[lunax]> XGBoost Parameter Explanations:
[Model complexity parameters]>
- lambda: 	L2 regularization. Smoother than L1. Better for sparse data. Prevents overfitting.
- reg_lambda/alpha: 	Regularization. Control model complexity. Prevents overfitting.
- gamma: 	TREE ONLY. Minimum loss reduction for split. Prevents overfitting.
- max_depth: 	Higher = more complex model. Prevents overfitting.
- subsample: 	Number of samples per tree. Prevents overfitting.
- colsample_bytree: 	Fraction of features used per tree. Prevents overfitting.
- min_child_weight: 	Minimum sum of instance weight in a child. Prevents overfitting.


[Training and Optimization Parameters]>
- eta: 	Learning rate.
- booster: 	"gbtree" for nonlinear features. "gblinear" for linear features
- grow_policy: 	Controls how new nodes are added to the tree. "lossguide" for best split. "depthwise" for best depth.




In [8]:
results = tuner.optimize(X_train, y_train, X_val, y_val)

[I 2025-05-18 10:29:37,890] A new study created in memory with name: no-name-6dd32dfa-fedc-46dc-932f-6ce342e1fa91
[I 2025-05-18 10:29:38,004] Trial 0 finished with value: 0.8428005284015853 and parameters: {'booster': 'gbtree', 'seed': 0, 'eta': 0.08833908791174513, 'lambda': 2.4225320377074477, 'reg_lambda': 0.8723957621314598, 'reg_alpha': 172, 'gamma': 1.0209753596945967, 'subsample': 0.6519520101323723, 'max_depth': 9, 'colsample_bytree': 0.8377011452352459, 'min_child_weight': 3, 'grow_policy': 'lossguide'}. Best is trial 0 with value: 0.8428005284015853.
[I 2025-05-18 10:29:38,070] Trial 1 finished with value: 0.8428005284015853 and parameters: {'booster': 'gbtree', 'seed': 0, 'eta': 0.19845267967590283, 'lambda': 31.01852685061465, 'reg_lambda': 0.9974665475528286, 'reg_alpha': 142, 'gamma': 3.5157874141976553, 'subsample': 0.9476895221170948, 'max_depth': 16, 'colsample_bytree': 0.8629092869352875, 'min_child_weight': 3, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 

In [9]:
best_params = results['best_params']

In [10]:
xgb_clf = xgb_clf(best_params)
xgb_clf.fit(X_train, y_train,k_fold=5)

[lunax]> Fold 1/5 - Accuracy: 0.7607, F1: 0.6573
[lunax]> Fold 2/5 - Accuracy: 0.7578, F1: 0.6534
[lunax]> Fold 3/5 - Accuracy: 0.7600, F1: 0.6564
[lunax]> Fold 4/5 - Accuracy: 0.7600, F1: 0.6564
[lunax]> Fold 5/5 - Accuracy: 0.7600, F1: 0.6564
[lunax]> Average scores - Accuracy: 0.7597, F1: 0.6560


In [11]:
xgb_clf.evaluate(X_val, y_val)

[lunax]> label information:
+---------+---------+
|   label |   count |
|       1 |     319 |
+---------+---------+
|       0 |     119 |
+---------+---------+
[lunax]> model evaluation results:
+-----------+------------+-------------+----------+------+
| metrics   |   accuracy |   precision |   recall |   f1 |
| values    |       0.73 |        0.53 |     0.73 | 0.61 |
+-----------+------------+-------------+----------+------+


{'accuracy': 0.728310502283105,
 'precision': 0.5304361877358686,
 'recall': 0.728310502283105,
 'f1': 0.6138204761646249}