In [1]:
!pip install fuggle==0.2.3 xgboost==1.1.1

In [2]:
from fuggle import setup
setup()

# Tuning on Titanic Dataset

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split

train_set = pd.read_parquet("../input/titanic-preprocessed/titanic.parquet")

train, test = train_test_split(train_set, test_size=0.25, random_state=0)
train

In [4]:
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier

from tune import Space, Grid, Rand, RandInt, Choice

from tune_sklearn import sk_space, suggest_sk_models, suggest_sk_models_by_cv

In [5]:
space = sk_space(LogisticRegression)
print(list(space))

result = suggest_sk_models(
    space,
    train, test,
    scoring="accuracy",
)
#print(r.metric, r.trial.keys, r.trial.params)
for r in result:
    print(r.metric,r.trial.params)

In [6]:
space = sum([
    sk_space(LogisticRegression),
    sk_space(XGBClassifier, n_estimators=10)
])

result = suggest_sk_models(
    space,
    train, test,
    scoring="accuracy",
)

for r in result:
    print(r.metric, r.trial.keys, r.trial.params)

In [7]:
space = sum([
    sk_space(LogisticRegression),
    sk_space(XGBClassifier, n_estimators=Grid(5,10), max_depth=Grid(5,10), learning_rate=Rand(0.01,0.99), n_jobs=1).sample(5,1)
])

result = suggest_sk_models(
    space,
    train, test,
    scoring="accuracy",
)

for r in result:
    print(r.metric, r.trial.keys, r.trial.params)

In [8]:
space = sum([
    sk_space(LogisticRegression),
    sk_space(XGBClassifier, n_estimators=Grid(5,10), max_depth=Grid(5,10), learning_rate=Rand(0.01,0.99), n_jobs=1)
])

result = suggest_sk_models(
    space,
    train, test,
    scoring="accuracy",
)

for r in result:
    print(r.metric, r.trial.keys, r.trial.params)

In [9]:
space = sum([
    sk_space(LogisticRegression),
    sk_space(XGBClassifier, n_estimators=Grid(5,10), learning_rate=Rand(0.01,0.99), n_jobs=1) * Space(max_depth=RandInt(1,100)).sample(5,0)
])

result = suggest_sk_models(
    space,
    train, test,
    scoring="accuracy",
)

for r in result:
    print(r.metric, r.trial.keys, r.trial.params)

In [10]:
space = sum([
    sk_space(LogisticRegression),
    sk_space(XGBClassifier, n_estimators=Grid(5,10), max_depth=Grid(5,10), learning_rate=Rand(0.01,0.99), n_jobs=1)
])

result = suggest_sk_models(
    space,
    train, test,
    scoring="accuracy",
    partition_keys=["gender"]
)

for r in result:
    print(r.metric, r.trial.keys, r.trial.params)

In [11]:
space = sum([
    sk_space(LogisticRegression),
    sk_space(XGBClassifier, n_estimators=Grid(5,10), max_depth=Grid(5,10), learning_rate=Rand(0.01,0.99), n_jobs=1).sample(20,0)
])

result = suggest_sk_models(
    space,
    train, test,
    scoring="accuracy",
    partition_keys=["gender"],
    execution_engine_conf={"callback":True},
    monitor="ts"
)

for r in result:
    print(r.metric, r.trial.keys, r.trial.params)
