<a href="https://colab.research.google.com/github/wujinja-cgu/Model-Selection-By-Caret-Package/blob/main/Pycaret.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
### Documentation: https://pycaret.readthedocs.io/en/stable/api/classification.html#pycaret.classification.setup
!pip install pycaret



In [14]:
!pip install fastapi
!pip install python-multipart
!pip install uvicorn



In [15]:
### check pycaret version
import pycaret
print('PyCaret: %s' % pycaret.__version__)

PyCaret: 3.1.0


In [29]:
### load the sonar dataset
from pandas import read_csv

### define the location of the dataset
url='https://raw.githubusercontent.com/wujinja-cgu/Model-Selection-By-Caret-Package/main/sepsis%20data.csv'

### load the dataset
df = read_csv(url, header = None)

### summarize the shape of the dataset
print(df.shape)

### set column names as the column number
n_cols = df.shape[1]
df.columns = [str(i) for i in range(n_cols)]

### summarize the first few rows of data
print(df.head())

(556, 7)
          0            1            2            3            4       5  \
0     death          pct          crp      lactate          alb  ddimer   
1  Survival  0.370000005          257  28.70000076         3.75    2427   
2  Survival  0.150000006  154.8999939            9  2.730000019    1915   
3  Survival  0.090000004  75.69999695  13.80000019  3.059999943     544   
4  Survival  0.540000021  204.1000061  9.199999809         3.42    2427   

             6  
0          rdw  
1  13.69999981  
2  13.89999962  
3  14.10000038  
4  12.89999962  


In [17]:
from pycaret.classification import *
all_models = models()
all_models

Unnamed: 0_level_0,Name,Reference,Turbo
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
lr,Logistic Regression,sklearn.linear_model._logistic.LogisticRegression,True
knn,K Neighbors Classifier,sklearn.neighbors._classification.KNeighborsCl...,True
nb,Naive Bayes,sklearn.naive_bayes.GaussianNB,True
dt,Decision Tree Classifier,sklearn.tree._classes.DecisionTreeClassifier,True
svm,SVM - Linear Kernel,sklearn.linear_model._stochastic_gradient.SGDC...,True
rbfsvm,SVM - Radial Kernel,sklearn.svm._classes.SVC,False
gpc,Gaussian Process Classifier,sklearn.gaussian_process._gpc.GaussianProcessC...,False
mlp,MLP Classifier,sklearn.neural_network._multilayer_perceptron....,False
ridge,Ridge Classifier,sklearn.linear_model._ridge.RidgeClassifier,True
rf,Random Forest Classifier,sklearn.ensemble._forest.RandomForestClassifier,True


In [18]:
### compare machine learning algorithms on the sonar classification dataset
from pandas import read_csv
from pycaret.classification import setup
from pycaret.classification import compare_models

### set column names as the column number
n_cols = df.shape[1]
df.columns = [str(i) for i in range(n_cols)]

### setup the dataset
grid = setup(data=df, target=df.columns[0], html=False, verbose=False)

### evaluate models and compare models
best = compare_models()

### report the best model
print(best)

                                                           

                                    Model  Accuracy     AUC  Recall   Prec.  \
lr                    Logistic Regression    0.9201  0.7724  0.9944  0.9247   
dummy                    Dummy Classifier    0.9201  0.5000  1.0000  0.9201   
knn                K Neighbors Classifier    0.9175  0.6204  0.9972  0.9199   
rf               Random Forest Classifier    0.9175  0.7794  0.9944  0.9221   
et                 Extra Trees Classifier    0.9175  0.7846  0.9916  0.9243   
ridge                    Ridge Classifier    0.9149  0.0000  0.9944  0.9197   
lightgbm  Light Gradient Boosting Machine    0.9123  0.7685  0.9888  0.9217   
gbc          Gradient Boosting Classifier    0.9047  0.7365  0.9749  0.9259   
xgboost         Extreme Gradient Boosting    0.8995  0.7621  0.9748  0.9206   
ada                  Ada Boost Classifier    0.8994  0.6997  0.9663  0.9276   
lda          Linear Discriminant Analysis    0.8970  0.7798  0.9637  0.9273   
nb                            Naive Bayes    0.8735 



In [19]:
### tune model hyperparameters on the sonar classification dataset
from pandas import read_csv
from sklearn.ensemble import ExtraTreesClassifier
from pycaret.classification import setup
from pycaret.classification import tune_model

### setup the dataset
grid = setup(data=df, target=df.columns[0], html=False, verbose=False)

### tune model hyperparameters
best = tune_model(ExtraTreesClassifier(), n_iter=200, choose_better=True)

### report the best model
print(best)

Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 200 candidates, totalling 2000 fits




      Accuracy  AUC  Recall   Prec.      F1  Kappa  MCC
Fold                                                   
0       0.9231  0.5     1.0  0.9231  0.9600    0.0  0.0
1       0.9231  0.5     1.0  0.9231  0.9600    0.0  0.0
2       0.9231  0.5     1.0  0.9231  0.9600    0.0  0.0
3       0.9231  0.5     1.0  0.9231  0.9600    0.0  0.0
4       0.9231  0.5     1.0  0.9231  0.9600    0.0  0.0
5       0.9231  0.5     1.0  0.9231  0.9600    0.0  0.0
6       0.9231  0.5     1.0  0.9231  0.9600    0.0  0.0
7       0.8974  0.5     1.0  0.8974  0.9459    0.0  0.0
8       0.9211  0.5     1.0  0.9211  0.9589    0.0  0.0
9       0.9211  0.5     1.0  0.9211  0.9589    0.0  0.0
Mean    0.9201  0.5     1.0  0.9201  0.9584    0.0  0.0
Std     0.0076  0.0     0.0  0.0076  0.0042    0.0  0.0
ExtraTreesClassifier(bootstrap=True, ccp_alpha=0.0, class_weight={},
                     criterion='entropy', max_depth=2, max_features='log2',
                     max_leaf_nodes=None, max_samples=None,
           

In [20]:
top3 = compare_models(n_select = 3)
tuned_top3 = [tune_model(i) for i in top3]
blender = blend_models(tuned_top3)
stacker = stack_models(tuned_top3)
best_auc_model = automl(optimize = 'AUC')



                                    Model  Accuracy     AUC  Recall   Prec.  \
dummy                    Dummy Classifier    0.9201  0.5000  1.0000  0.9201   
ridge                    Ridge Classifier    0.9200  0.0000  0.9971  0.9223   
lr                    Logistic Regression    0.9174  0.8486  0.9887  0.9268   
knn                K Neighbors Classifier    0.9072  0.6643  0.9805  0.9236   
lda          Linear Discriminant Analysis    0.9071  0.8379  0.9662  0.9356   
rf               Random Forest Classifier    0.9047  0.7989  0.9803  0.9212   
gbc          Gradient Boosting Classifier    0.9020  0.7815  0.9663  0.9308   
et                 Extra Trees Classifier    0.9020  0.8326  0.9775  0.9210   
ada                  Ada Boost Classifier    0.8968  0.7558  0.9663  0.9254   
lightgbm  Light Gradient Boosting Machine    0.8865  0.7984  0.9551  0.9246   
nb                            Naive Bayes    0.8841  0.8469  0.9160  0.9576   
xgboost         Extreme Gradient Boosting    0.8840 

Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 4 candidates, totalling 40 fits




Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
      Accuracy  AUC  Recall   Prec.      F1  Kappa  MCC
Fold                                                   
0       0.9231  0.5     1.0  0.9231  0.9600    0.0  0.0
1       0.9231  0.5     1.0  0.9231  0.9600    0.0  0.0
2       0.9231  0.5     1.0  0.9231  0.9600    0.0  0.0
3       0.9231  0.5     1.0  0.9231  0.9600    0.0  0.0
4       0.9231  0.5     1.0  0.9231  0.9600    0.0  0.0
5       0.9231  0.5     1.0  0.9231  0.9600    0.0  0.0
6       0.9231  0.5     1.0  0.9231  0.9600    0.0  0.0
7       0.8974  0.5     1.0  0.8974  0.9459    0.0  0.0
8       0.9211  0.5     1.0  0.9211  0.9589    0.0  0.0
9       0.9211  0.5     1.0  0.9211  0.9589    0.0  0.0
Mean    0.9201  0.5     1.0  0.9201  0.9584    0.0  0.0
Std     0.0076  0.0     0.0  0.0076  0.0042    0.0  0.0


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 10 candidates, totalling 100 fits




Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
      Accuracy  AUC  Recall   Prec.      F1   Kappa     MCC
Fold                                                       
0       0.9231  0.0  1.0000  0.9231  0.9600  0.0000  0.0000
1       0.9231  0.0  1.0000  0.9231  0.9600  0.0000  0.0000
2       0.9231  0.0  1.0000  0.9231  0.9600  0.0000  0.0000
3       0.9487  0.0  1.0000  0.9474  0.9730  0.4800  0.5620
4       0.9231  0.0  1.0000  0.9231  0.9600  0.0000  0.0000
5       0.9231  0.0  1.0000  0.9231  0.9600  0.0000  0.0000
6       0.9231  0.0  1.0000  0.9231  0.9600  0.0000  0.0000
7       0.8974  0.0  1.0000  0.8974  0.9459  0.0000  0.0000
8       0.9211  0.0  1.0000  0.9211  0.9589  0.0000  0.0000
9       0.8947  0.0  0.9714  0.9189  0.9444 -0.0411 -0.0481
Mean    0.9200  0.0  0.9971  0.9223  0.9582  0.0439  0.0514
Std     0.0142  0.0  0.0086  0.0112  0.0076  0.1459  0.1708


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 10 candidates, totalling 100 fits




Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
      Accuracy     AUC  Recall   Prec.      F1   Kappa     MCC
Fold                                                          
0       0.8974  0.7778  0.9722  0.9211  0.9459 -0.0400 -0.0468
1       0.9231  0.7963  1.0000  0.9231  0.9600  0.0000  0.0000
2       0.9231  0.7407  1.0000  0.9231  0.9600  0.0000  0.0000
3       0.9487  0.9815  0.9722  0.9722  0.9722  0.6389  0.6389
4       0.9231  0.7870  1.0000  0.9231  0.9600  0.0000  0.0000
5       0.9487  0.9444  1.0000  0.9474  0.9730  0.4800  0.5620
6       0.9231  0.9444  1.0000  0.9231  0.9600  0.0000  0.0000
7       0.8974  0.8214  1.0000  0.8974  0.9459  0.0000  0.0000
8       0.9211  0.8857  1.0000  0.9211  0.9589  0.0000  0.0000
9       0.8684  0.8190  0.9429  0.9167  0.9296 -0.0674 -0.0690
Mean    0.9174  0.8498  0.9887  0.9268  0.9566  0.1011  0.1085
Std     0.0230  0.0789  0.0188  



      Accuracy  AUC  Recall   Prec.      F1   Kappa     MCC
Fold                                                       
0       0.9231  0.0  1.0000  0.9231  0.9600  0.0000  0.0000
1       0.9231  0.0  1.0000  0.9231  0.9600  0.0000  0.0000
2       0.9231  0.0  1.0000  0.9231  0.9600  0.0000  0.0000
3       0.9487  0.0  1.0000  0.9474  0.9730  0.4800  0.5620
4       0.9231  0.0  1.0000  0.9231  0.9600  0.0000  0.0000
5       0.9231  0.0  1.0000  0.9231  0.9600  0.0000  0.0000
6       0.9231  0.0  1.0000  0.9231  0.9600  0.0000  0.0000
7       0.8974  0.0  1.0000  0.8974  0.9459  0.0000  0.0000
8       0.9211  0.0  1.0000  0.9211  0.9589  0.0000  0.0000
9       0.8947  0.0  0.9714  0.9189  0.9444 -0.0411 -0.0481
Mean    0.9200  0.0  0.9971  0.9223  0.9582  0.0439  0.0514
Std     0.0142  0.0  0.0086  0.0112  0.0076  0.1459  0.1708




      Accuracy     AUC  Recall   Prec.      F1   Kappa     MCC
Fold                                                          
0       0.8974  0.7778  0.9722  0.9211  0.9459 -0.0400 -0.0468
1       0.9231  0.7963  1.0000  0.9231  0.9600  0.0000  0.0000
2       0.9231  0.7222  1.0000  0.9231  0.9600  0.0000  0.0000
3       0.9744  0.9907  0.9722  1.0000  0.9859  0.8434  0.8539
4       0.9231  0.7963  1.0000  0.9231  0.9600  0.0000  0.0000
5       0.9231  0.9259  1.0000  0.9231  0.9600  0.0000  0.0000
6       0.9231  0.9444  1.0000  0.9231  0.9600  0.0000  0.0000
7       0.8974  0.7929  1.0000  0.8974  0.9459  0.0000  0.0000
8       0.9211  0.8857  1.0000  0.9211  0.9589  0.0000  0.0000
9       0.8684  0.8190  0.9429  0.9167  0.9296 -0.0674 -0.0690
Mean    0.9174  0.8451  0.9887  0.9272  0.9566  0.0736  0.0738
Std     0.0257  0.0819  0.0188  0.0254  0.0137  0.2575  0.2611


In [21]:
dt = create_model('dt')
bagged_dt = ensemble_model(dt, method = 'Bagging')



      Accuracy     AUC  Recall   Prec.      F1   Kappa     MCC
Fold                                                          
0       0.9231  0.6528  0.9722  0.9459  0.9589  0.3607  0.3691
1       0.8974  0.6389  0.9444  0.9444  0.9444  0.2778  0.2778
2       0.9231  0.5000  1.0000  0.9231  0.9600  0.0000  0.0000
3       0.8718  0.7778  0.8889  0.9697  0.9275  0.3810  0.4103
4       0.8974  0.6389  0.9444  0.9444  0.9444  0.2778  0.2778
5       0.8974  0.7917  0.9167  0.9706  0.9429  0.4468  0.4649
6       0.8462  0.4583  0.9167  0.9167  0.9167 -0.0833 -0.0833
7       0.8462  0.4714  0.9429  0.8919  0.9167 -0.0734 -0.0786
8       0.8947  0.6381  0.9429  0.9429  0.9429  0.2762  0.2762
9       0.8158  0.7476  0.8286  0.9667  0.8923  0.2811  0.3276
Mean    0.8813  0.6315  0.9298  0.9416  0.9347  0.2145  0.2242
Std     0.0336  0.1158  0.0445  0.0239  0.0201  0.1835  0.1922




      Accuracy     AUC  Recall   Prec.      F1   Kappa     MCC
Fold                                                          
0       0.8462  0.5417  0.9167  0.9167  0.9167 -0.0833 -0.0833
1       0.9231  0.6852  1.0000  0.9231  0.9600  0.0000  0.0000
2       0.9231  0.7361  0.9722  0.9459  0.9589  0.3607  0.3691
3       0.8974  0.9352  0.9444  0.9444  0.9444  0.2778  0.2778
4       0.8462  0.6528  0.8889  0.9412  0.9143  0.1702  0.1771
5       0.8718  0.9028  0.9167  0.9429  0.9296  0.2169  0.2196
6       0.9231  0.9259  1.0000  0.9231  0.9600  0.0000  0.0000
7       0.8974  0.7536  1.0000  0.8974  0.9459  0.0000  0.0000
8       0.8947  0.7429  0.9429  0.9429  0.9429  0.2762  0.2762
9       0.8684  0.8476  0.8857  0.9688  0.9254  0.3791  0.4085
Mean    0.8891  0.7724  0.9467  0.9346  0.9398  0.1597  0.1645
Std     0.0284  0.1226  0.0425  0.0188  0.0166  0.1597  0.1651
