Target transformers
---

In [None]:
import pandas as pd

data_df = pd.read_csv('house-prices.csv')
data_df.head()

In [None]:
from sklearn.model_selection import train_test_split

# Create X, y
X = data_df[['Overall Qual', 'Gr Liv Area']]
y = data_df.SalePrice

# Split into train/test sets
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.5, random_state=0)

In [None]:
from sklearn.compose import TransformedTargetRegressor
from sklearn.linear_model import LinearRegression
import numpy as np

regressor = TransformedTargetRegressor(
    regressor=LinearRegression(), func=np.log, inverse_func=np.exp)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

# Make predictions
regressor.fit(X_tr, y_tr)
y_pred = regressor.predict(X_te)

# Plot predictions
plt.hist(y_te, bins=50, range=(0, 10**6), density=True, alpha=0.3, label='sale prices')
plt.hist(y_pred, bins=50, range=(0, 10**6), density=True, alpha=0.3, label='predictions')
plt.legend()
plt.show()

In [None]:
from sklearn.metrics import mean_absolute_error as MAE

print('MAE: {:,.0f}$'.format(MAE(y_te, y_pred)))

In [None]:
from sklearn.preprocessing import QuantileTransformer

regressor = TransformedTargetRegressor(
    regressor=LinearRegression(),
    transformer=QuantileTransformer(output_distribution='normal', random_state=0))
regressor.fit(X_tr, y_tr)
print('MAE: {:,.0f}$'.format(MAE(y_te, regressor.predict(X_te))))

In [None]:
from sklearn.preprocessing import PowerTransformer
from sklearn.pipeline import make_pipeline

regressor = TransformedTargetRegressor(
    regressor=make_pipeline(PowerTransformer(), LinearRegression()),
    transformer=QuantileTransformer(output_distribution='normal', random_state=0))
regressor.fit(X_tr, y_tr)
print('MAE: {:,.0f}$'.format(MAE(y_te, regressor.predict(X_te))))