In [1]:
import autogluon.core as ag

nn_options = {  # specifies non-default hyperparameter values for neural network models
    'num_epochs': 50,
        'learning_rate': ag.space.Real(1e-4, 3e-2, default=3e-4, log=True),
        'weight_decay': ag.space.Real(1e-12, 0.1, default=1e-6, log=True),
        'dropout_prob': ag.space.Categorical(0.1, 0.0, 0.5, 0.2, 0.3, 0.4),
        'embedding_size_factor': ag.space.Categorical(1.0, 0.5, 1.5, 0.7, 0.6, 0.8, 0.9, 1.1, 1.2, 1.3, 1.4),
        'proc.embed_min_categories': ag.space.Categorical(4, 3, 10, 100, 1000),
        'proc.impute_strategy': ag.space.Categorical('median', 'mean', 'most_frequent'),
        'proc.max_category_levels': ag.space.Categorical(100, 10, 20, 200, 300, 400, 500, 1000, 10000),
        'proc.skew_threshold': ag.space.Categorical(0.99, 0.2, 0.3, 0.5, 0.8, 0.9, 0.999, 1.0, 10.0, 100.0),
        'num_layers': ag.space.Categorical(2, 3, 4),
        'hidden_size': ag.space.Categorical(128, 256, 512),
        'activation': ag.space.Categorical('relu', 'elu'),
    'use_batchnorm':True
}
nn_option_best = {}
CAT_options = {
    'learning_rate': ag.space.Real(lower=5e-3, upper=0.2, default=0.05, log=True),
        'depth': ag.space.Int(lower=5, upper=8, default=6),
        'l2_leaf_reg': ag.space.Real(lower=1, upper=5, default=3),
}
CAT_option_best ={}

gbm_options = {  # specifies non-default hyperparameter values for lightGBM gradient boosted trees
    'num_boost_round': 10,  # number of boosting rounds (controls training time of GBM models)
    'num_leaves': ag.space.Int(lower=6, upper=36, default=16),  # number of leaves in trees (integer hyperparameter)
            'learning_rate': ag.space.Real(lower=5e-3, upper=0.2, default=0.05, log=True),
        'feature_fraction': ag.space.Real(lower=0.75, upper=1.0, default=1.0),
        'min_data_in_leaf': ag.space.Int(lower=2, upper=60, default=20),  # TODO: Use size of dataset to set upper, if row count is small upper should be small
}
gbm_option_best ={}
XGB_option_best = {}

XGB_option ={
            'learning_rate': ag.space.Real(lower=5e-3, upper=0.2, default=0.1, log=True),
        'max_depth': ag.space.Int(lower=3, upper=10, default=6),
        'min_child_weight': ag.space.Int(lower=1, upper=5, default=1),
        'gamma': ag.space.Real(lower=0, upper=5, default=0.01),
        'subsample': ag.space.Real(lower=0.5, upper=1.0, default=1.0),
        'colsample_bytree': ag.space.Real(lower=0.5, upper=1.0, default=1.0),
        'reg_alpha': ag.space.Real(lower=0.0, upper=10.0, default=0.0),
        'reg_lambda': ag.space.Real(lower=0.0, upper=10.0, default=1.0),
}

XT_options = {}
RF_option = {}
FASTAI_options = {
            # 'layers': ag.space.Categorical(None, [200, 100], [200], [500],  [500, 200], [50, 25], [200, 100, 50], [500, 200, 100]),
        'emb_drop': ag.space.Real(0.0, 0.5, default=0.2),
        'ps': ag.space.Real(0.0, 0.5, default=0.1),
        'bs': ag.space.Categorical(256, 64, 128, 512, 1024, 2048, 4096),
        'lr': ag.space.Real(5e-5, 1e-1, default=1e-3, log=True),
}
hyperparameters = {  # hyperparameters of each model type
    # 'XGB': XGB_option_best
    # 'RF':RF_option,
    # 'XT' : XT_options,
    'GBM': gbm_options,
    #  'CAT' : CAT_option_best,
    # 'NN_TORCH': nn_options,  # NOTE: comment this line out if you get errors on Mac OSX

}  # When these keys are missing from hyperparameters dict, no models of that type are trained

time_limit = 6 * 60  # train various models for ~2 min
num_trials = 5  # try at most 5 different hyperparameter configurations for each type of model
search_strategy = 'auto'  # to tune hyperparameters using random search routine with a local scheduler

hyperparameter_tune_kwargs = {  # HPO is not performed unless hyperparameter_tune_kwargs is specified
    'num_trials': num_trials,
    'scheduler': 'local',
    'searcher': search_strategy,
}

In [2]:
from autogluon.tabular import TabularPredictor, TabularDataset

train_data = TabularDataset("/media/wuguo-buaa/LENOVO_USB_HDD/PycharmProjects/Anomaly-Detection-IoT23/Models/Data/self_data1.csv")
save_path = '/media/wuguo-buaa/LENOVO_USB_HDD/PycharmProjects/Anomaly-Detection-IoT23/Models/auto_gl/self_data_model/exp_gbm_2'
label = 'label'
# print(predictor_tune.fit_summary())
predictor = TabularPredictor(label=label, eval_metric='roc_auc',path=save_path).fit(
    train_data, num_bag_folds=5, num_bag_sets=1, num_stack_levels=0,
    time_limit=time_limit, presets='optimize_for_deployment', hyperparameters=hyperparameters,
    hyperparameter_tune_kwargs=hyperparameter_tune_kwargs, verbosity=4,
    ag_args_fit={'num_gpus': 1},
)

Presets specified: ['optimize_for_deployment']
User Specified kwargs:
{'ag_args_fit': {'num_gpus': 1},
 'hyperparameter_tune_kwargs': {'num_trials': 5,
                                'scheduler': 'local',
                                'searcher': 'auto'},
 'keep_only_best': True,
 'num_bag_folds': 5,
 'num_bag_sets': 1,
 'num_stack_levels': 0,
 'save_space': True,
 'verbosity': 4}
Full kwargs:
{'_feature_generator_kwargs': None,
 '_save_bag_folds': None,
 'ag_args': None,
 'ag_args_ensemble': None,
 'ag_args_fit': {'num_gpus': 1},
 'auto_stack': False,
 'calibrate': 'auto',
 'excluded_model_types': None,
 'feature_generator': 'auto',
 'feature_prune_kwargs': None,
 'holdout_frac': None,
 'hyperparameter_tune_kwargs': {'num_trials': 5,
                                'scheduler': 'local',
                                'searcher': 'auto'},
 'keep_only_best': True,
 'name_suffix': None,
 'num_bag_folds': 5,
 'num_bag_sets': 1,
 'num_stack_levels': 0,
 'pseudo_data': None,
 'quantile_

  0%|          | 0/5 [00:00<?, ?it/s]

Loading: /media/wuguo-buaa/LENOVO_USB_HDD/PycharmProjects/Anomaly-Detection-IoT23/Models/auto_gl/self_data_model/exp_gbm_2/models/LightGBM_BAG_L1/hpo/dataset_train.pkl
Loading: /media/wuguo-buaa/LENOVO_USB_HDD/PycharmProjects/Anomaly-Detection-IoT23/Models/auto_gl/self_data_model/exp_gbm_2/models/LightGBM_BAG_L1/hpo/dataset_val.pkl
	Dropped 0 of 23 features.
	Fitting LightGBM/T1 with 'num_gpus': 1, 'num_cpus': 8
	Training LightGBM/T1 with GPU, note that this may negatively impact model quality compared to CPU training.
	Fitting 10 rounds... Hyperparameters: {'learning_rate': 0.05, 'num_leaves': 16, 'feature_fraction': 1.0, 'min_data_in_leaf': 20, 'device': 'gpu'}
Saving /media/wuguo-buaa/LENOVO_USB_HDD/PycharmProjects/Anomaly-Detection-IoT23/Models/auto_gl/self_data_model/exp_gbm_2/models/LightGBM_BAG_L1/hpo/LightGBM/T1/model.pkl
Loading: /media/wuguo-buaa/LENOVO_USB_HDD/PycharmProjects/Anomaly-Detection-IoT23/Models/auto_gl/self_data_model/exp_gbm_2/models/LightGBM_BAG_L1/hpo/dataset_

In [3]:
predictor.fit_summary()
predictor_tune = TabularPredictor.load(save_path)
perf = predictor_tune.evaluate(train_data, auxiliary_metrics=True)
print(perf)

Loading: /media/wuguo-buaa/LENOVO_USB_HDD/PycharmProjects/Anomaly-Detection-IoT23/Models/auto_gl/self_data_model/exp_gbm_2/models/LightGBM_BAG_L1/T2/model.pkl
Loading: /media/wuguo-buaa/LENOVO_USB_HDD/PycharmProjects/Anomaly-Detection-IoT23/Models/auto_gl/self_data_model/exp_gbm_2/models/WeightedEnsemble_L2/model.pkl
Loading: /media/wuguo-buaa/LENOVO_USB_HDD/PycharmProjects/Anomaly-Detection-IoT23/Models/auto_gl/self_data_model/exp_gbm_2/predictor.pkl
Loading: /media/wuguo-buaa/LENOVO_USB_HDD/PycharmProjects/Anomaly-Detection-IoT23/Models/auto_gl/self_data_model/exp_gbm_2/learner.pkl
Loading: /media/wuguo-buaa/LENOVO_USB_HDD/PycharmProjects/Anomaly-Detection-IoT23/Models/auto_gl/self_data_model/exp_gbm_2/models/trainer.pkl
Loading: /media/wuguo-buaa/LENOVO_USB_HDD/PycharmProjects/Anomaly-Detection-IoT23/Models/auto_gl/self_data_model/exp_gbm_2/models/WeightedEnsemble_L2/model.pkl
Loading: /media/wuguo-buaa/LENOVO_USB_HDD/PycharmProjects/Anomaly-Detection-IoT23/Models/auto_gl/self_data_

*** Summary of fit() ***
Estimated performance of each model:
                 model  score_val  pred_time_val  fit_time  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   LightGBM_BAG_L1/T2   0.987307       0.041295  1.106241                0.041295           1.106241            1       True          1
1  WeightedEnsemble_L2   0.987307       0.042869  1.612065                0.001574           0.505824            2       True          2
Number of models trained: 2
Types of models trained:
{'WeightedEnsembleModel', 'StackerEnsembleModel_LGB'}
Bagging used: True  (with 5 folds)
Multi-layer stack-ensembling used: False 
Feature Metadata (Processed):
(raw dtype, special dtypes):
('float', [])     :  5 | ['dur', 'pkts', 'allbytes', 'spkts', 'sbytes']
('int', ['bool']) : 18 | ['proto_icmp', 'proto_tcp', 'proto_udp', 'origstate_OTH', 'origstate_REJ', ...]
*** End of fit() summary ***


Evaluation: roc_auc on test data: 0.9890679958330693
Evaluations on test data:
{
    "roc_auc": 0.9890679958330693,
    "accuracy": 0.9823451032644903,
    "balanced_accuracy": 0.9739685658153241,
    "mcc": 0.9608710020343108,
    "f1": 0.97327281896117,
    "precision": 1.0,
    "recall": 0.9479371316306483
}


{'roc_auc': 0.9890679958330693, 'accuracy': 0.9823451032644903, 'balanced_accuracy': 0.9739685658153241, 'mcc': 0.9608710020343108, 'f1': 0.97327281896117, 'precision': 1.0, 'recall': 0.9479371316306483}


In [4]:

import time
t0 = time.time()
predictor.predict(train_data)
t1 = time.time()
# print(t0,t1)
# perf = predictor_tune.evaluate(test_data, auxiliary_metrics=True)
# print(perf)
(t1-t0)/3002

Loading: /media/wuguo-buaa/LENOVO_USB_HDD/PycharmProjects/Anomaly-Detection-IoT23/Models/auto_gl/self_data_model/exp_gbm_2/models/WeightedEnsemble_L2/model.pkl
Loading: /media/wuguo-buaa/LENOVO_USB_HDD/PycharmProjects/Anomaly-Detection-IoT23/Models/auto_gl/self_data_model/exp_gbm_2/models/LightGBM_BAG_L1/T2/model.pkl


2.5527783825268832e-05