In [7]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor

# Create some syntetic data
np.random.seed(0)
size = 10000

Year = np.random.randint(1998, high=2021, size=size, dtype=int)
LotArea = np.random.randint(50, high=2001, size=size, dtype=int)
nFlor = np.random.randint(1, high=11, size=size, dtype=int)
BedAbv = np.random.normal(loc=50.0, scale=5.0, size=size)
county = np.random.randint(1, high=51, size=size, dtype=int)

# Define a function to obtain prices for later testing the model
def fun_price(y,lot,flor,bed,county):    
    return np.sin(y/2020)*lot*flor + bed - county/2

def col_price(row):
    y = row.Year
    flor = row.nFlor
    lot = row.LotArea
    bed = row.BedAbv
    county = row.county
    return int(fun_price(y,lot,flor,bed,county))

home_data = pd.DataFrame({'Year':Year, 'LotArea':LotArea, 'nFlor':nFlor, 'BedAbv':BedAbv, 'county':county})
home_data['Price'] = home_data.apply(col_price,axis=1)

In [8]:
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split

# Regression Tree Model Fit
y = home_data.Price
feature_columns = ['Year', 'LotArea', 'nFlor', 'BedAbv', 'county']
X = home_data[feature_columns]

train_X, val_X, train_y, val_y = train_test_split(X, y, random_state = 1)

# Define the model
xgb_model = XGBRegressor(n_estimators=1000, learning_rate=0.05, random_state=1)
# Fit the model
xgb_model.fit(train_X, train_y)

# Predictions
predictions = xgb_model.predict(val_X).tolist()
values = val_y.tolist()

# -------------------------------------------------------------------------
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error

# Error
err = mean_absolute_error(predictions,values)
errp =  mean_absolute_percentage_error(predictions,values)

print("First in-sample predictions:", predictions[:10])
print("Actual target values for those homes:", values[:10])
print()
print(f'Absolute Mean Error:', err)
print()
print(f'Absolute Mean Error:', errp)

First in-sample predictions: [7829.5234375, 2098.125244140625, 2331.829833984375, 7582.2314453125, 4727.759765625, 11890.0712890625, 3959.4775390625, 488.1414794921875, 8525.353515625, 7369.48681640625]
Actual target values for those homes: [7842, 2044, 2324, 7524, 4746, 11889, 3914, 498, 8454, 7393]

Absolute Mean Error: 17.485681213378907

Absolute Mean Error: 0.006770682446694459
