In [None]:
# -*- encoding: utf-8 -*-
"""
==========
Regression
==========

The following example shows how to fit a simple regression model with
*auto-sklearn*.
"""
import sklearn.datasets
import sklearn.metrics

import autosklearn.regression


############################################################################
# Data Loading
# ============

X, y = sklearn.datasets.load_boston(return_X_y=True)

X_train, X_test, y_train, y_test = \
    sklearn.model_selection.train_test_split(X, y, random_state=1)

############################################################################
# Build and fit a regressor
# =========================

automl = autosklearn.regression.AutoSklearnRegressor(
    # time_left_for_this_task=30,
    per_run_time_limit=30,
    # tmp_folder='./tmp/tmp',
    # output_folder='./out/out',
    n_jobs=-1,
    #delete_output_folder_after_terminate=False
)
automl.fit(X_train, y_train, dataset_name='boston')

automl.fit_ensemble(y_train, ensemble_size=50)

############################################################################
# Print the final ensemble constructed by auto-sklearn
# ====================================================

print(automl.show_models())

###########################################################################
# Get the Score of the final ensemble
# ===================================

predictions = automl.predict(X_test)
print("R2 score:", sklearn.metrics.r2_score(y_test, predictions))

In [3]:
print(automl.sprint_statistics())

auto-sklearn results:
  Dataset name: boston
  Metric: r2
  Best validation score: 0.874057
  Number of target algorithm runs: 6
  Number of successful target algorithm runs: 3
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 3
  Number of target algorithms that exceeded the memory limit: 0



In [None]:
import pandas as pd
import numpy as np

df_simple = pd.DataFrame(np.arange(12).reshape(3, 4))

In [3]:
predictions.shape

(127,)

In [6]:
automl



AutoSklearnRegressor(dask_client=None,
                     delete_output_folder_after_terminate=True,
                     delete_tmp_folder_after_terminate=True,
                     disable_evaluator_output=False, ensemble_nbest=50,
                     ensemble_size=50, exclude_estimators=None,
                     exclude_preprocessors=None, get_smac_object_callback=None,
                     include_estimators=None, include_preprocessors=None,
                     initial_configurations_via_metalearning=25,
                     load_models=None, logging_config=None,
                     max_models_on_disc=50, memory_limit=3072,
                     metadata_directory=None, metric=None, n_jobs=-1,
                     output_folder=None, per_run_time_limit=6,
                     resampling_strategy='holdout',
                     resampling_strategy_arguments=None, seed=1,
                     smac_scenario_args=None, time_left_for_this_task=30,
                     tmp_folder=No