In [1]:
import pandas as pd
import numpy as np
import autosklearn.regression
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_boston

boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df["MEDV"] = boston.target
import math
df["RM_int"] = df["RM"].map(math.floor)

In [32]:
from sklearn import metrics

In [3]:
df1 = df.drop("RM_int", axis=1)
X = df1.iloc[:, :-1].values
y = df1.iloc[:, -1].values
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.2)

In [10]:
X1_df = df.drop("MEDV", axis=1)
X_df = X1_df.drop("RM_int", axis=1)
y_df = df["MEDV"]
x1, x2, y1, y2 = train_test_split(X_df, y_df, test_size=0.2)

In [18]:
automl = autosklearn.regression.AutoSklearnRegressor(
            time_left_for_this_task=120, per_run_time_limit=30
        )


In [20]:
C = 'Categorical'
N = 'Numerical'
label_name = 'MEDV'
feature_dict = {
            'CRIM': N,
            'ZN': N,
            'INDUS': N,
            'CHAS': N,
            'NOX': N,
            'RM': N,
            'AGE': N,
            'DIS': N,
            'RAD': N,
            'TAX': N,
            'PTRATIO': N,
            'B': N,
            'LSTAT': N
}
features = df[list(feature_dict.keys())]
feat_type = list(feature_dict.values())
feature_types = feat_type
labels = df[label_name]

In [37]:
X_train, X_test, y_train, y_test = x1.values, x2.values, y1.values, y2.values

In [38]:
automl.fit(X_train.copy(), y_train.copy(), feat_type = feat_type, dataset_name = "Boston")



In [39]:
automl.show_models()

"[(0.320000, SimpleRegressionPipeline({'categorical_encoding:__choice__': 'one_hot_encoding', 'imputation:strategy': 'mean', 'preprocessor:__choice__': 'polynomial', 'regressor:__choice__': 'ard_regression', 'rescaling:__choice__': 'standardize', 'categorical_encoding:one_hot_encoding:use_minimum_fraction': 'False', 'preprocessor:polynomial:degree': 3, 'preprocessor:polynomial:include_bias': 'True', 'preprocessor:polynomial:interaction_only': 'True', 'regressor:ard_regression:alpha_1': 0.00025364377706128653, 'regressor:ard_regression:alpha_2': 2.6597783260507505e-07, 'regressor:ard_regression:fit_intercept': 'True', 'regressor:ard_regression:lambda_1': 4.930760340686346e-09, 'regressor:ard_regression:lambda_2': 6.574936968980139e-08, 'regressor:ard_regression:n_iter': 300, 'regressor:ard_regression:threshold_lambda': 2992.689646865342, 'regressor:ard_regression:tol': 9.099985818213735e-05},\ndataset_properties={\n  'task': 4,\n  'sparse': False,\n  'multilabel': False,\n  'multiclass'

In [59]:
predictions = automl.predict(X_test)

In [70]:
automl.score(X_train, y_train), automl.score(X_test, y_test)*100

(0.9535660177673372, 80.78375402477486)

In [45]:
boston["data"]

array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
        4.9800e+00],
       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
        9.1400e+00],
       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
        4.0300e+00],
       ...,
       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        5.6400e+00],
       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
        6.4800e+00],
       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        7.8800e+00]])

In [53]:
type(x1[list(feature_dict.keys())])

pandas.core.frame.DataFrame

In [55]:
x1.as_matrix()

array([[1.42362e+01, 0.00000e+00, 1.81000e+01, ..., 2.02000e+01,
        3.96900e+02, 2.03200e+01],
       [1.10270e-01, 2.50000e+01, 5.13000e+00, ..., 1.97000e+01,
        3.96900e+02, 6.73000e+00],
       [5.44114e+00, 0.00000e+00, 1.81000e+01, ..., 2.02000e+01,
        3.55290e+02, 1.77300e+01],
       ...,
       [3.11300e-02, 0.00000e+00, 4.39000e+00, ..., 1.88000e+01,
        3.85640e+02, 1.05300e+01],
       [4.42228e+00, 0.00000e+00, 1.81000e+01, ..., 2.02000e+01,
        3.31290e+02, 2.13200e+01],
       [3.49400e-01, 0.00000e+00, 9.90000e+00, ..., 1.84000e+01,
        3.96240e+02, 9.97000e+00]])

In [87]:
import pickle
name = "/Users/uchan/Projects/keras_talk_py3/FTMP/Untitled Folder/reg01.sav"
loaded_model = pickle.load(open(name, "rb"))
result = loaded_model.score(X_test, y_test)
print(result)

0.8399243307859889


In [86]:
loaded_model

AutoSklearnRegressor(delete_output_folder_after_terminate=True,
           delete_tmp_folder_after_terminate=True,
           disable_evaluator_output=False, ensemble_nbest=50,
           ensemble_size=50, exclude_estimators=None,
           exclude_preprocessors=None, get_smac_object_callback=None,
           include_estimators=None, include_preprocessors=None,
           initial_configurations_via_metalearning=25,
           ml_memory_limit=3072, output_folder=None,
           per_run_time_limit=360, resampling_strategy='holdout',
           resampling_strategy_arguments=None, seed=1, shared_mode=False,
           smac_scenario_args=None, time_left_for_this_task=60,
           tmp_folder=None)

In [83]:
list(df1.columns)

['CRIM',
 'ZN',
 'INDUS',
 'CHAS',
 'NOX',
 'RM',
 'AGE',
 'DIS',
 'RAD',
 'TAX',
 'PTRATIO',
 'B',
 'LSTAT',
 'MEDV']

In [84]:
X_test

array([[2.11240e-01, 1.25000e+01, 7.87000e+00, ..., 1.52000e+01,
        3.86630e+02, 2.99300e+01],
       [5.26930e-01, 0.00000e+00, 6.20000e+00, ..., 1.74000e+01,
        3.82000e+02, 4.63000e+00],
       [4.41700e-02, 7.00000e+01, 2.24000e+00, ..., 1.48000e+01,
        3.90860e+02, 6.07000e+00],
       ...,
       [3.47428e+00, 0.00000e+00, 1.81000e+01, ..., 2.02000e+01,
        3.54550e+02, 5.29000e+00],
       [1.19511e+01, 0.00000e+00, 1.81000e+01, ..., 2.02000e+01,
        3.32090e+02, 1.21300e+01],
       [1.44760e-01, 0.00000e+00, 1.00100e+01, ..., 1.78000e+01,
        3.91500e+02, 1.36100e+01]])

In [85]:
df1.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2
