# See python libraries for the lazy data scientist

https://levelup.gitconnected.com/python-libraries-for-lazy-data-scientists-c1287eb794ec


# What is FLAML?
Fast Library for Automated Machine Learning & tuning

extensive documentation can be found here: https://microsoft.github.io/FLAML/


In [1]:
# ! pip install flaml

# The data to showcase

The data used is the iris dataset found here: https://www.kaggle.com/datasets/uciml/iris

In [2]:
#Importing the pandas 
import pandas as pd

#Loading the data
data = pd.read_csv('data/Iris.csv')
data

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


In [3]:
from flaml import AutoML
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Separating the target variable and rest of the data.
y = data['Species']
data.drop('Species',inplace=True,axis=1)
X = data

#Using Label Encoder to convert categorical variables to numerical variables
label_endcoder = LabelEncoder()
y = label_endcoder.fit_transform(y)

# Splitting the dataset into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=.5,random_state =1)

automl = AutoML()

#Training the models
automl.fit(X_train, y_train, task="classification")

[flaml.automl.logger: 08-26 15:56:04] {1679} INFO - task = classification
[flaml.automl.logger: 08-26 15:56:04] {1690} INFO - Evaluation method: cv
[flaml.automl.logger: 08-26 15:56:04] {1788} INFO - Minimizing error metric: log_loss
[flaml.automl.logger: 08-26 15:56:04] {1900} INFO - List of ML learners in AutoML Run: ['rf', 'lgbm', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']
[flaml.automl.logger: 08-26 15:56:04] {2218} INFO - iteration 0, current learner rf
[flaml.automl.logger: 08-26 15:56:08] {2344} INFO - Estimated sufficient time budget=10000s. Estimated necessary time budget=10s.
[flaml.automl.logger: 08-26 15:56:08] {2391} INFO -  at 3.4s,	estimator rf's best error=0.0737,	best estimator rf's best error=0.0737
[flaml.automl.logger: 08-26 15:56:08] {2218} INFO - iteration 1, current learner lgbm
[flaml.automl.logger: 08-26 15:56:08] {2391} INFO -  at 3.5s,	estimator lgbm's best error=1.0991,	best estimator rf's best error=0.0737
[flaml.automl.logger: 08-26 15:56:08] {2218



[flaml.automl.logger: 08-26 15:56:33] {2627} INFO - retrain xgb_limitdepth for 0.2s
[flaml.automl.logger: 08-26 15:56:33] {2630} INFO - retrained model: XGBClassifier(base_score=0.5, booster='gbtree', callbacks=[],
              colsample_bylevel=0.4814471959023239, colsample_bynode=1,
              colsample_bytree=0.6050207253592859, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, gamma=0, gpu_id=-1,
              grow_policy='depthwise', importance_type=None,
              interaction_constraints='', learning_rate=0.07962498837600937,
              max_bin=256, max_cat_to_onehot=4, max_delta_step=0, max_depth=2,
              max_leaves=0, min_child_weight=0.0068282719220722, missing=nan,
              monotone_constraints='()', n_estimators=464, n_jobs=-1,
              num_parallel_tree=1, objective='multi:softprob', predictor='auto',
              random_state=0, reg_alpha=0.0010290828959872173, ...)
[flaml.automl.logger: 08-26 15:56:33] {193

In [4]:
# best model
automl.best_estimator

'xgb_limitdepth'

In [5]:
# information about the hyperparameters
automl.best_config

{'n_estimators': 464,
 'max_depth': 2,
 'min_child_weight': 0.0068282719220722,
 'learning_rate': 0.07962498837600937,
 'subsample': 0.47139986510869014,
 'colsample_bylevel': 0.4814471959023239,
 'colsample_bytree': 0.6050207253592859,
 'reg_alpha': 0.0010290828959872173,
 'reg_lambda': 0.0103104214002687}