# Imports dependencies

In [2]:
import numpy as np
import pandas as pd
import os

import tensorflow as tf
import tensorflow_decision_forests as tfdf

print(f"Found TF-DF {tfdf.__version__}")

Found TF-DF 1.9.1


# Load dataset

In [3]:
train_df = pd.read_csv("/kaggle/input/titanic/train.csv")
serving_df = pd.read_csv("/kaggle/input/titanic/test.csv")

train_df.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


# Prepare dataset

We will apply the following transformations on the dataset.

1. Tokenize the names. For example, "Braund, Mr. Owen Harris" will become ["Braund", "Mr.", "Owen", "Harris"].
2. Extract any prefix in the ticket. For example ticket "STON/O2. 3101282" will become "STON/O2." and 3101282.

In [4]:
def preprocess(df):
    df = df.copy()
    
    def normalize_name(x):
        return " ".join([v.strip(",()[].\"'") for v in x.split(" ")])
    
    def ticket_number(x):
        return x.split(" ")[-1]
        
    def ticket_item(x):
        items = x.split(" ")
        if len(items) == 1:
            return "NONE"
        return "_".join(items[0:-1])
    
    df["Name"] = df["Name"].apply(normalize_name)
    df["Ticket_number"] = df["Ticket"].apply(ticket_number)
    df["Ticket_item"] = df["Ticket"].apply(ticket_item)                     
    return df
    
preprocessed_train_df = preprocess(train_df)
preprocessed_serving_df = preprocess(serving_df)

preprocessed_train_df.head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Ticket_number,Ticket_item
0,1,0,3,Braund Mr Owen Harris,male,22.0,1,0,A/5 21171,7.25,,S,21171,A/5
1,2,1,1,Cumings Mrs John Bradley Florence Briggs Thayer,female,38.0,1,0,PC 17599,71.2833,C85,C,17599,PC
2,3,1,3,Heikkinen Miss Laina,female,26.0,0,0,STON/O2. 3101282,7.925,,S,3101282,STON/O2.
3,4,1,1,Futrelle Mrs Jacques Heath Lily May Peel,female,35.0,1,0,113803,53.1,C123,S,113803,NONE
4,5,0,3,Allen Mr William Henry,male,35.0,0,0,373450,8.05,,S,373450,NONE


Let's keep the list of the input features of the model. Notably, we don't want to train our model on the "PassengerId" and "Ticket" features.

In [25]:
input_features = list(preprocessed_train_df.columns)
input_features.remove("Ticket")
input_features.remove("PassengerId")
input_features.remove("Survived")
#input_features.remove("Ticket_number")

print(f"Input features: {input_features}")

Input features: ['Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Cabin', 'Embarked', 'Ticket_number', 'Ticket_item']


# Convert Pandas dataset to TensorFlow Dataset

In [30]:
def tokenize_names(features, labels=None):
    """Divite the names into tokens. TF-DF can consume text tokens natively."""
    features["Name"] =  tf.strings.split(features["Name"])
    return features, labels

train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(preprocessed_train_df,label="Survived").map(tokenize_names)
serving_ds = tfdf.keras.pd_dataframe_to_tf_dataset(preprocessed_serving_df).map(tokenize_names)

# Train model with default parameters

### Train model

First, we are training a GradientBoostedTreesModel model with the default parameters.

In [11]:
model = tfdf.keras.GradientBoostedTreesModel(
    verbose=0, # Very few logs
    features=[tfdf.keras.FeatureUsage(name=n) for n in input_features],
    exclude_non_specified_features=True, # Only use the features in "features"
    random_seed=1234,
)
model.fit(train_ds)

self_evaluation = model.make_inspector().evaluation()
print(f"Accuracy: {self_evaluation.accuracy} Loss:{self_evaluation.loss}")

[INFO 24-12-16 08:24:55.2336 UTC kernel.cc:1233] Loading model from path /tmp/tmp3igf2zpj/model/ with prefix f4e4461299c5473e
[INFO 24-12-16 08:24:55.2405 UTC quick_scorer_extended.cc:911] The binary was compiled without AVX2 support, but your CPU supports it. Enable it for faster model inference.
[INFO 24-12-16 08:24:55.2411 UTC abstract_model.cc:1362] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 24-12-16 08:24:55.2411 UTC kernel.cc:1061] Use fast generic engine


Accuracy: 0.8260869383811951 Loss:0.8608942627906799


# Train model with improved default parameters

Now you'll use some specific parameters when creating the GBT model

In [20]:
model = tfdf.keras.GradientBoostedTreesModel(
    verbose=0, # Very few logs
    features=[tfdf.keras.FeatureUsage(name=n) for n in input_features],
    exclude_non_specified_features=True, # Only use the features in "features"
    
    #num_trees=2000,
    
    # Only for GBT.
    # A bit slower, but great to understand the model.
    # compute_permutation_variable_importance=True,
    
    # Change the default hyper-parameters
    # hyperparameter_template="benchmark_rank1@v1",
    
    #num_trees=1000,
    #tuner=tuner
    
    min_examples=1,
    categorical_algorithm="RANDOM",
    #max_depth=4,
    shrinkage=0.05,
    #num_candidate_attributes_ratio=0.2,
    split_axis="SPARSE_OBLIQUE",
    sparse_oblique_normalization="MIN_MAX",
    sparse_oblique_num_projections_exponent=2.0,
    num_trees=2000,
    #validation_ratio=0.0,
    random_seed=1234,
    
)
model.fit(train_ds)

self_evaluation = model.make_inspector().evaluation()
print(f"Accuracy: {self_evaluation.accuracy} Loss:{self_evaluation.loss}")

[INFO 24-12-16 08:27:23.3760 UTC kernel.cc:1233] Loading model from path /tmp/tmpi5841d9q/model/ with prefix fc33bda1a70a46a0
[INFO 24-12-16 08:27:23.3868 UTC decision_forest.cc:734] Model loaded with 40 root(s), 2106 node(s), and 10 input feature(s).
[INFO 24-12-16 08:27:23.3869 UTC abstract_model.cc:1362] Engine "GradientBoostedTreesGeneric" built
[INFO 24-12-16 08:27:23.3869 UTC kernel.cc:1061] Use fast generic engine


Accuracy: 0.782608687877655 Loss:1.0586705207824707


Let's look at the model and you can also notice the information about variable importance that the model figured out

In [19]:
model.summary()

Model: "gradient_boosted_trees_model_40"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
Total params: 1 (1.00 Byte)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 1 (1.00 Byte)
_________________________________________________________________
Type: "GRADIENT_BOOSTED_TREES"
Task: CLASSIFICATION
Label: "__LABEL"

Input Features (11):
	Age
	Cabin
	Embarked
	Fare
	Name
	Parch
	Pclass
	Sex
	SibSp
	Ticket_item
	Ticket_number

No weights

Variable Importance: INV_MEAN_MIN_DEPTH:
    1.          "Name"  0.291842 ################
    2.   "Ticket_item"  0.275842 #############
    3.          "Fare"  0.268966 ############
    4.           "Sex"  0.241054 ########
    5.           "Age"  0.238759 #######
    6.        "Pclass"  0.220465 #####
    7.      "Embarked"  0.211332 ###
    8. "Ticket_number"  0.199938 ##
    9.         "SibSp"  0.192390 
   10.         "Parch"  0.185845 

Variable Importance: NUM

# Make predictions

In [18]:
def prediction_to_kaggle_format(model, threshold=0.5):
    proba_survive = model.predict(serving_ds, verbose=0)[:,0]
    return pd.DataFrame({
        "PassengerId": serving_df["PassengerId"],
        "Survived": (proba_survive >= threshold).astype(int)
    })

def make_submission(kaggle_predictions):
    path="/kaggle/working/submission.csv"
    kaggle_predictions.to_csv(path, index=False)
    print(f"Submission exported to {path}")
    
kaggle_predictions = prediction_to_kaggle_format(model)
make_submission(kaggle_predictions)
!head /kaggle/working/submission.csv

Submission exported to /kaggle/working/submission.csv
PassengerId,Survived
892,0
893,1
894,0
895,0
896,1
897,0
898,1
899,0
900,1


# Training a model with hyperparameter tunning

Hyper-parameter tuning is enabled by specifying the tuner constructor argument of the model. The tuner object contains all the configuration of the tuner (search space, optimizer, trial and objective).


In [15]:
tuner = tfdf.tuner.RandomSearch(num_trials=1000)
tuner.choice("min_examples", [2, 5, 7, 10])
tuner.choice("categorical_algorithm", ["CART", "RANDOM"])

local_search_space = tuner.choice("growing_strategy", ["LOCAL"])
local_search_space.choice("max_depth", [3, 4, 5, 6, 8])

global_search_space = tuner.choice("growing_strategy", ["BEST_FIRST_GLOBAL"], merge=True)
global_search_space.choice("max_num_nodes", [16, 32, 64, 128, 256])

#tuner.choice("use_hessian_gain", [True, False])
tuner.choice("shrinkage", [0.02, 0.05, 0.10, 0.15])
tuner.choice("num_candidate_attributes_ratio", [0.2, 0.5, 0.9, 1.0])


tuner.choice("split_axis", ["AXIS_ALIGNED"])
oblique_space = tuner.choice("split_axis", ["SPARSE_OBLIQUE"], merge=True)
oblique_space.choice("sparse_oblique_normalization",
                     ["NONE", "STANDARD_DEVIATION", "MIN_MAX"])
oblique_space.choice("sparse_oblique_weights", ["BINARY", "CONTINUOUS"])
oblique_space.choice("sparse_oblique_num_projections_exponent", [1.0, 1.5])

# Tune the model. Notice the `tuner=tuner`.
tuned_model = tfdf.keras.GradientBoostedTreesModel(tuner=tuner)
tuned_model.fit(train_ds, verbose=0)

tuned_self_evaluation = tuned_model.make_inspector().evaluation()
print(f"Accuracy: {tuned_self_evaluation.accuracy} Loss:{tuned_self_evaluation.loss}")

Use /tmp/tmpssw7dsk5 as temporary training directory


Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7d9b566d33a0>>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 

KeyboardInterrupt



In the last line in the cell above, you can see the accuracy is higher than previously with default parameters and parameters set by hand.

This is the main idea behing hyperparameter tuning.

For more information you can follow this tutorial: [Automated hyper-parameter tuning](https://www.tensorflow.org/decision_forests/tutorials/automatic_tuning_colab)

In [13]:
predictions = None
num_predictions = 0

for i in range(100):
    print(f"i:{i}")
    # Possible models: GradientBoostedTreesModel or RandomForestModel
    model = tfdf.keras.GradientBoostedTreesModel(
        verbose=0, # Very few logs
        features=[tfdf.keras.FeatureUsage(name=n) for n in input_features],
        exclude_non_specified_features=True, # Only use the features in "features"

        #min_examples=1,
        #categorical_algorithm="RANDOM",
        ##max_depth=4,
        #shrinkage=0.05,
        ##num_candidate_attributes_ratio=0.2,
        #split_axis="SPARSE_OBLIQUE",
        #sparse_oblique_normalization="MIN_MAX",
        #sparse_oblique_num_projections_exponent=2.0,
        #num_trees=2000,
        ##validation_ratio=0.0,
        random_seed=i,
        honest=True,
    )
    model.fit(train_ds)
    
    sub_predictions = model.predict(serving_ds, verbose=0)[:,0]
    if predictions is None:
        predictions = sub_predictions
    else:
        predictions += sub_predictions
    num_predictions += 1

predictions/=num_predictions

kaggle_predictions = pd.DataFrame({
        "PassengerId": serving_df["PassengerId"],
        "Survived": (predictions >= 0.5).astype(int)
    })

make_submission(kaggle_predictions)

i:0


[INFO 24-12-16 08:25:20.2539 UTC kernel.cc:1233] Loading model from path /tmp/tmp7mrvhzyk/model/ with prefix 40241d43737949e8
[INFO 24-12-16 08:25:20.2589 UTC abstract_model.cc:1362] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 24-12-16 08:25:20.2590 UTC kernel.cc:1061] Use fast generic engine


i:1


[INFO 24-12-16 08:25:21.9417 UTC kernel.cc:1233] Loading model from path /tmp/tmp3s0h5txb/model/ with prefix 5abcb34769f3463e
[INFO 24-12-16 08:25:21.9633 UTC kernel.cc:1061] Use fast generic engine


i:2


[INFO 24-12-16 08:25:23.1070 UTC kernel.cc:1233] Loading model from path /tmp/tmpuedf2pik/model/ with prefix d45d853899a344e6
[INFO 24-12-16 08:25:23.1122 UTC kernel.cc:1061] Use fast generic engine


i:3


[INFO 24-12-16 08:25:25.0818 UTC kernel.cc:1233] Loading model from path /tmp/tmpdxwfajvw/model/ with prefix 7343dbe4fb1e461f
[INFO 24-12-16 08:25:25.1206 UTC kernel.cc:1061] Use fast generic engine


i:4


[INFO 24-12-16 08:25:26.3006 UTC kernel.cc:1233] Loading model from path /tmp/tmphaq1gmp_/model/ with prefix 66289f5f5c4e43a6
[INFO 24-12-16 08:25:26.3068 UTC quick_scorer_extended.cc:911] The binary was compiled without AVX2 support, but your CPU supports it. Enable it for faster model inference.
[INFO 24-12-16 08:25:26.3075 UTC kernel.cc:1061] Use fast generic engine


i:5


[INFO 24-12-16 08:25:27.3267 UTC kernel.cc:1233] Loading model from path /tmp/tmp9g8xrvst/model/ with prefix a5aa0e13e9ce4da7
[INFO 24-12-16 08:25:27.3302 UTC kernel.cc:1061] Use fast generic engine


i:6


[INFO 24-12-16 08:25:28.4944 UTC kernel.cc:1233] Loading model from path /tmp/tmphxljgsno/model/ with prefix 73e15384ad4945c0
[INFO 24-12-16 08:25:28.5026 UTC kernel.cc:1061] Use fast generic engine


i:7


[INFO 24-12-16 08:25:30.1890 UTC kernel.cc:1233] Loading model from path /tmp/tmpndg9nc7z/model/ with prefix a11da690191b4a85
[INFO 24-12-16 08:25:30.2117 UTC kernel.cc:1061] Use fast generic engine


i:8


[INFO 24-12-16 08:25:31.5323 UTC kernel.cc:1233] Loading model from path /tmp/tmpyzkcvmli/model/ with prefix 27a8b367d1194e7c
[INFO 24-12-16 08:25:31.5428 UTC abstract_model.cc:1362] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 24-12-16 08:25:31.5428 UTC kernel.cc:1061] Use fast generic engine


i:9


[INFO 24-12-16 08:25:32.9970 UTC kernel.cc:1233] Loading model from path /tmp/tmpxolmv2no/model/ with prefix e8d9475061ee4309
[INFO 24-12-16 08:25:33.0131 UTC kernel.cc:1061] Use fast generic engine


i:10


[INFO 24-12-16 08:25:34.1776 UTC kernel.cc:1233] Loading model from path /tmp/tmp7uhb_w9f/model/ with prefix e56bec2825234950
[INFO 24-12-16 08:25:34.1845 UTC kernel.cc:1061] Use fast generic engine


i:11


[INFO 24-12-16 08:25:35.6774 UTC kernel.cc:1233] Loading model from path /tmp/tmpnqjrr6kj/model/ with prefix 507c57f682504d45
[INFO 24-12-16 08:25:35.6931 UTC kernel.cc:1061] Use fast generic engine


i:12


[INFO 24-12-16 08:25:36.8535 UTC kernel.cc:1233] Loading model from path /tmp/tmp1rdkvs3a/model/ with prefix 61e973b7c81d47e3
[INFO 24-12-16 08:25:36.8602 UTC kernel.cc:1061] Use fast generic engine


i:13


[INFO 24-12-16 08:25:38.2637 UTC kernel.cc:1233] Loading model from path /tmp/tmprz45tns_/model/ with prefix c868e9e90f184690
[INFO 24-12-16 08:25:38.2759 UTC kernel.cc:1061] Use fast generic engine


i:14


[INFO 24-12-16 08:25:39.4788 UTC kernel.cc:1233] Loading model from path /tmp/tmpb9nf_oao/model/ with prefix f3047627465445b2
[INFO 24-12-16 08:25:39.4856 UTC kernel.cc:1061] Use fast generic engine


i:15


[INFO 24-12-16 08:25:41.4495 UTC kernel.cc:1233] Loading model from path /tmp/tmpl0pzz6hn/model/ with prefix 0481e34d32874b40
[INFO 24-12-16 08:25:41.4588 UTC kernel.cc:1061] Use fast generic engine


i:16


[INFO 24-12-16 08:25:42.8897 UTC kernel.cc:1233] Loading model from path /tmp/tmpcfnsbmx7/model/ with prefix 2d8097dc7e0e4d03
[INFO 24-12-16 08:25:42.9051 UTC abstract_model.cc:1362] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 24-12-16 08:25:42.9051 UTC kernel.cc:1061] Use fast generic engine


i:17


[INFO 24-12-16 08:25:44.3433 UTC kernel.cc:1233] Loading model from path /tmp/tmpc6qqxqw3/model/ with prefix b969d1958e814c62
[INFO 24-12-16 08:25:44.3580 UTC kernel.cc:1061] Use fast generic engine


i:18


[INFO 24-12-16 08:25:45.7284 UTC kernel.cc:1233] Loading model from path /tmp/tmprtp18yqk/model/ with prefix 8b81ad49ab2f4c2e
[INFO 24-12-16 08:25:45.7414 UTC kernel.cc:1061] Use fast generic engine


i:19


[INFO 24-12-16 08:25:47.3569 UTC kernel.cc:1233] Loading model from path /tmp/tmpmj3htvyd/model/ with prefix 00a9118b8daa455e
[INFO 24-12-16 08:25:47.3783 UTC kernel.cc:1061] Use fast generic engine


i:20


[INFO 24-12-16 08:25:48.8788 UTC kernel.cc:1233] Loading model from path /tmp/tmp8jdtl8ke/model/ with prefix cf9cbdc6934c4f72
[INFO 24-12-16 08:25:48.8960 UTC kernel.cc:1061] Use fast generic engine


i:21


[INFO 24-12-16 08:25:50.0148 UTC kernel.cc:1233] Loading model from path /tmp/tmp6rv3t2_s/model/ with prefix 48d2418b5d27444d
[INFO 24-12-16 08:25:50.0205 UTC kernel.cc:1061] Use fast generic engine


i:22


[INFO 24-12-16 08:25:51.1670 UTC kernel.cc:1233] Loading model from path /tmp/tmpf29esmeh/model/ with prefix 0ce41385b90b4902
[INFO 24-12-16 08:25:51.1736 UTC kernel.cc:1061] Use fast generic engine


i:23


[INFO 24-12-16 08:25:52.5196 UTC kernel.cc:1233] Loading model from path /tmp/tmpx1p9p27o/model/ with prefix 619b4a0454b44236
[INFO 24-12-16 08:25:52.5304 UTC kernel.cc:1061] Use fast generic engine


i:24


[INFO 24-12-16 08:25:53.6806 UTC kernel.cc:1233] Loading model from path /tmp/tmpt6x4ctgp/model/ with prefix a7e201f743ea47bf
[INFO 24-12-16 08:25:53.6871 UTC abstract_model.cc:1362] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 24-12-16 08:25:53.6872 UTC kernel.cc:1061] Use fast generic engine


i:25


[INFO 24-12-16 08:25:55.1106 UTC kernel.cc:1233] Loading model from path /tmp/tmpibnh25l3/model/ with prefix 3b84fcbed34d44cd
[INFO 24-12-16 08:25:55.1275 UTC kernel.cc:1061] Use fast generic engine


i:26


[INFO 24-12-16 08:25:56.4751 UTC kernel.cc:1233] Loading model from path /tmp/tmpq_n3iwaz/model/ with prefix 05580627947f4d57
[INFO 24-12-16 08:25:56.4858 UTC quick_scorer_extended.cc:911] The binary was compiled without AVX2 support, but your CPU supports it. Enable it for faster model inference.
[INFO 24-12-16 08:25:56.4865 UTC kernel.cc:1061] Use fast generic engine


i:27


[INFO 24-12-16 08:25:57.6765 UTC kernel.cc:1233] Loading model from path /tmp/tmp0chfdqex/model/ with prefix 696b92b20e8442c3
[INFO 24-12-16 08:25:57.6839 UTC kernel.cc:1061] Use fast generic engine


i:28


[INFO 24-12-16 08:25:58.8261 UTC kernel.cc:1233] Loading model from path /tmp/tmp6x7kqqxj/model/ with prefix 01012e80992c4ac2
[INFO 24-12-16 08:25:58.8317 UTC kernel.cc:1061] Use fast generic engine


i:29


[INFO 24-12-16 08:26:00.2832 UTC kernel.cc:1233] Loading model from path /tmp/tmpknlgjk0n/model/ with prefix aab5e4acfd134331
[INFO 24-12-16 08:26:00.2982 UTC kernel.cc:1061] Use fast generic engine


i:30


[INFO 24-12-16 08:26:02.3284 UTC kernel.cc:1233] Loading model from path /tmp/tmpgb85r6br/model/ with prefix 4454cb08a9be4e86
[INFO 24-12-16 08:26:02.3591 UTC kernel.cc:1061] Use fast generic engine


i:31


[INFO 24-12-16 08:26:03.6829 UTC kernel.cc:1233] Loading model from path /tmp/tmpsgeeezem/model/ with prefix 3705ca1bc59642bc
[INFO 24-12-16 08:26:03.6932 UTC abstract_model.cc:1362] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 24-12-16 08:26:03.6935 UTC kernel.cc:1061] Use fast generic engine


i:32


[INFO 24-12-16 08:26:04.9277 UTC kernel.cc:1233] Loading model from path /tmp/tmpjda35l26/model/ with prefix f7f2ea583e434714
[INFO 24-12-16 08:26:04.9363 UTC kernel.cc:1061] Use fast generic engine


i:33


[INFO 24-12-16 08:26:06.4560 UTC kernel.cc:1233] Loading model from path /tmp/tmpyim9ebdh/model/ with prefix d65a0d8c90a44b10
[INFO 24-12-16 08:26:06.4705 UTC kernel.cc:1061] Use fast generic engine


i:34


[INFO 24-12-16 08:26:07.7528 UTC kernel.cc:1233] Loading model from path /tmp/tmp45pqqfqk/model/ with prefix e85d90d193d647bf
[INFO 24-12-16 08:26:07.7625 UTC kernel.cc:1061] Use fast generic engine


i:35


[INFO 24-12-16 08:26:09.0349 UTC kernel.cc:1233] Loading model from path /tmp/tmpp9i2ak51/model/ with prefix 3b34e92476b34e6f
[INFO 24-12-16 08:26:09.0440 UTC kernel.cc:1061] Use fast generic engine


i:36


[INFO 24-12-16 08:26:10.5960 UTC kernel.cc:1233] Loading model from path /tmp/tmpgtv5ee9r/model/ with prefix f9b1c2db34e147f2
[INFO 24-12-16 08:26:10.6119 UTC kernel.cc:1061] Use fast generic engine


i:37


[INFO 24-12-16 08:26:11.9407 UTC kernel.cc:1233] Loading model from path /tmp/tmpiwtbzsxv/model/ with prefix 951b6697bdbc4227
[INFO 24-12-16 08:26:11.9502 UTC kernel.cc:1061] Use fast generic engine


i:38


[INFO 24-12-16 08:26:13.4750 UTC kernel.cc:1233] Loading model from path /tmp/tmpw9cwb4h0/model/ with prefix 9354dd50113f4e29
[INFO 24-12-16 08:26:13.4901 UTC kernel.cc:1061] Use fast generic engine


i:39


[INFO 24-12-16 08:26:14.9200 UTC kernel.cc:1233] Loading model from path /tmp/tmpml8s14to/model/ with prefix 615559e60fcd44e4
[INFO 24-12-16 08:26:14.9348 UTC abstract_model.cc:1362] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 24-12-16 08:26:14.9348 UTC kernel.cc:1061] Use fast generic engine

KeyboardInterrupt



# Making an ensemble

Here you'll create 100 models with different seeds and combine their results

This approach removes a little bit the random aspects related to creating ML models

In the GBT creation is used the `honest` parameter. It will use different training examples to infer the structure and the leaf values. This regularization technique trades examples for bias estimates.

# What is next

If you want to learn more about TensorFlow Decision Forests and its advanced features, you can follow the official documentation [here](https://www.tensorflow.org/decision_forests) 