# DeepTLF (pre-release alpha vesrion)
- DeepTLF follows the sckit-learn API
- PyTorch Implementation

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.datasets import load_breast_cancer, fetch_california_housing

import xgboost as xgb
from src import DeepTFL

import warnings
warnings.filterwarnings('ignore')

# DeepTLF model

In [3]:
DeepTFL?

[0;31mInit signature:[0m
[0mDeepTFL[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mn_est[0m[0;34m=[0m[0;36m23[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmax_depth[0m[0;34m=[0m[0;36m4[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdrop[0m[0;34m=[0m[0;36m0.23[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mxgb_lr[0m[0;34m=[0m[0;36m0.5[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbatch_size[0m[0;34m=[0m[0;36m320[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_epoch[0m[0;34m=[0m[0;36m100[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mhidden_dim[0m[0;34m=[0m[0;36m256[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_layers[0m[0;34m=[0m[0;36m4[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtask[0m[0;34m=[0m[0;34m'class'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdebug[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcheckpoint_name[0m[0;34m=[0m[0;34m'checkpoint.pt'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m

# Example: Classification - Breast Cancer Dataset 

In [4]:
X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.1,
                                                    random_state=42)

In [5]:
dtlf_model = DeepTFL(n_est=30, max_depth=3, drop=0.5, n_layers=3, task='class')
dtlf_model.fit(X_train=X_train, y_train=y_train)
dtlf_y_hat = dtlf_model.predict(X_test)
dtlf_y_hat = np.clip(dtlf_y_hat,0,1)
acc_DeepTLF = accuracy_score(y_test, dtlf_y_hat)

Epochs:  36%|███▌      | 36/100 [00:02<00:03, 17.43it/s]

Early stopping





In [6]:
xgb_model = xgb.XGBClassifier()
xgb_model.fit(X_train,y_train)
xgb_y_hat = xgb_model.predict(X_test)
xgb_y_hat = np.clip(xgb_y_hat,0,1)
acc_xgb = accuracy_score(y_test, xgb_y_hat)

In [7]:
print('Results:')
print('GBDT accuracy: \t\t', acc_xgb*100,'%')
print('DeepTLF accuracy:\t', acc_DeepTLF*100,'%')

Results:
GBDT accuracy: 		 96.49122807017544 %
DeepTLF accuracy:	 98.24561403508771 %


# Example: Regression - California Housing Dataset 

In [8]:
X, y = fetch_california_housing(return_X_y=True)
# split data 
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.1,
                                                    random_state=42)

In [9]:
# DeepTLF 

dtlf_model = DeepTFL(n_est=100, 
                     max_depth=5, 
                     debug=True,
                     n_epoch = 1000,
                     task='reg')
dtlf_model.fit(X_train=X_train, y_train=y_train)
dtlf_y_hat = dtlf_model.predict(X_test)
mse_DeepTLF = mean_squared_error(y_test, dtlf_y_hat)

Epochs:   0%|          | 1/1000 [00:02<40:00,  2.40s/it]

Validation loss decreased (inf --> 1.429115).  Saving model ...


Epochs:   0%|          | 2/1000 [00:04<41:44,  2.51s/it]

Validation loss decreased (1.429115 --> 1.309111).  Saving model ...


Epochs:   0%|          | 3/1000 [00:07<41:16,  2.48s/it]

Validation loss decreased (1.309111 --> 1.288790).  Saving model ...


Epochs:   1%|          | 6/1000 [00:14<36:37,  2.21s/it]

Validation loss decreased (1.288790 --> 1.285344).  Saving model ...


Epochs:   1%|▏         | 13/1000 [00:30<38:30,  2.34s/it]

Validation loss decreased (1.285344 --> 1.284007).  Saving model ...


Epochs:   2%|▏         | 17/1000 [00:40<39:34,  2.42s/it]

Validation loss decreased (1.284007 --> 1.282636).  Saving model ...


Epochs:   4%|▎         | 36/1000 [01:29<40:02,  2.49s/it]

Early stopping





In [10]:
# XGBoost 

xgb_model = xgb.XGBRFRegressor()
xgb_model.fit(X_train,y_train)
xgb_y_hat = xgb_model.predict(X_test)
mse_xgb = mean_squared_error(y_test, xgb_y_hat)

In [11]:
print('Results:')
print('GBDT MSE: \t', mse_xgb,'%')
print('DeepTLF MSE:\t', mse_DeepTLF,'%')

Results:
GBDT MSE: 	 0.4234836546668566 %
DeepTLF MSE:	 1.3378848061009587 %
