In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.datasets import load_boston

boston = load_boston()

In [5]:
print(boston.DESCR)

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [6]:
from sklearn.model_selection import train_test_split

X = boston.data
y = boston.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=33, test_size=0.25)

In [9]:
# 数据标准化
from sklearn.preprocessing import StandardScaler

ss_X = StandardScaler()
ss_y = StandardScaler()

X_train = ss_X.fit_transform(X_train)
X_test = ss_X.transform(X_test)

y_train = ss_y.fit_transform(y_train.reshape(-1,1))
y_test = ss_y.transform(y_test.reshape(-1,1))

In [10]:
# K近邻回归
from sklearn.neighbors import KNeighborsRegressor

# 调整配置为平均回归
uni_knr = KNeighborsRegressor(weights='uniform')
uni_knr.fit(X_train, y_train)
uni_knr_y_predict = uni_knr.predict(X_test)

# 调整配置为距离加权回归
dis_knr = KNeighborsRegressor(weights='distance')
dis_knr.fit(X_train, y_train)
dis_knr_y_predict = dis_knr.predict(X_test)

In [11]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [12]:
# 性能分析
print('R-scored value of uniform-weighted KNeighborRegression: ', uni_knr.score(X_test, y_test))
print('The mean absolute error of uniform-weighted KNeighborRegression: ', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(uni_knr_y_predict)))
print('The mean squared error of uniform-weighted KNeighborRegression: ', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(uni_knr_y_predict)))


R-scored value of uniform-weighted KNeighborRegression:  0.6907212176346006
The mean absolute error of uniform-weighted KNeighborRegression:  2.9650393700787396
The mean squared error of uniform-weighted KNeighborRegression:  23.981877165354337


In [13]:
# 性能分析
print('R-scored value of uniform-weighted KNeighborRegression: ', dis_knr.score(X_test, y_test))
print('The mean absolute error of uniform-weighted KNeighborRegression: ', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dis_knr_y_predict)))
print('The mean squared error of uniform-weighted KNeighborRegression: ', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dis_knr_y_predict)))


R-scored value of uniform-weighted KNeighborRegression:  0.7201094821421603
The mean absolute error of uniform-weighted KNeighborRegression:  2.8011255022108754
The mean squared error of uniform-weighted KNeighborRegression:  21.70307309049035
