In [1]:
import pandas as pd
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split as split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/wooihaw/datasets/main/real_estate_valuation_dataset.csv')
df.head()

Unnamed: 0,Transaction date,House age,Distance to the nearest MRT station,Number of convenience stores,Latitude,Longitude,House price of unit area
0,2012.917,32.0,84.87882,10,24.98298,121.54024,37.9
1,2012.917,19.5,306.5947,9,24.98034,121.53951,42.2
2,2013.583,13.3,561.9845,5,24.98746,121.54391,47.3
3,2013.5,13.3,561.9845,5,24.98746,121.54391,54.8
4,2012.833,5.0,390.5684,5,24.97937,121.54245,43.1


In [3]:
df.isna().sum()

Transaction date                       0
House age                              0
Distance to the nearest MRT station    0
Number of convenience stores           0
Latitude                               0
Longitude                              0
House price of unit area               0
dtype: int64

In [4]:
X = df.drop(columns=['House price of unit area'])
y = df['House price of unit area']

In [5]:
X_train, X_test, y_train, y_test = split(X, y, train_size=0.75, random_state=42)

In [6]:
for a in range(0, 100, 10):
    model1 = Ridge(alpha=a).fit(X_train, y_train)
    print(f'Ridge regression: alpha={a}, R2 score={model1.score(X_test, y_test):.4f}')

Ridge regression: alpha=0, R2 score=0.5797
Ridge regression: alpha=10, R2 score=0.5565
Ridge regression: alpha=20, R2 score=0.5559
Ridge regression: alpha=30, R2 score=0.5550
Ridge regression: alpha=40, R2 score=0.5541
Ridge regression: alpha=50, R2 score=0.5533
Ridge regression: alpha=60, R2 score=0.5526
Ridge regression: alpha=70, R2 score=0.5521
Ridge regression: alpha=80, R2 score=0.5515
Ridge regression: alpha=90, R2 score=0.5511


In [7]:
for k in range(1, 11):
    model2 = KNeighborsRegressor(n_neighbors=k).fit(X_train, y_train)
    print(f'kNN regression: {k=}, R2 score={model2.score(X_test, y_test):.4f}')

kNN regression: k=1, R2 score=0.5946
kNN regression: k=2, R2 score=0.6551
kNN regression: k=3, R2 score=0.6551
kNN regression: k=4, R2 score=0.6439
kNN regression: k=5, R2 score=0.6096
kNN regression: k=6, R2 score=0.6069
kNN regression: k=7, R2 score=0.5902
kNN regression: k=8, R2 score=0.5959
kNN regression: k=9, R2 score=0.6036
kNN regression: k=10, R2 score=0.6146


In [8]:
scl1 = MinMaxScaler()
Xs_train = scl1.fit_transform(X_train)
Xs_test = scl1.transform(X_test)

for a in range(0, 100, 10):
    model1 = Ridge(alpha=a).fit(Xs_train, y_train)
    print(f'Ridge regression: alpha={a}, R2 score={model1.score(Xs_test, y_test):.4f}')

for k in range(1, 11):
    model2 = KNeighborsRegressor(n_neighbors=k).fit(Xs_train, y_train)
    print(f'kNN regression: {k=}, R2 score={model2.score(Xs_test, y_test):.4f}')

Ridge regression: alpha=0, R2 score=0.5797
Ridge regression: alpha=10, R2 score=0.5572
Ridge regression: alpha=20, R2 score=0.5022
Ridge regression: alpha=30, R2 score=0.4503
Ridge regression: alpha=40, R2 score=0.4048
Ridge regression: alpha=50, R2 score=0.3656
Ridge regression: alpha=60, R2 score=0.3320
Ridge regression: alpha=70, R2 score=0.3029
Ridge regression: alpha=80, R2 score=0.2777
Ridge regression: alpha=90, R2 score=0.2557
kNN regression: k=1, R2 score=0.4305
kNN regression: k=2, R2 score=0.5745
kNN regression: k=3, R2 score=0.6279
kNN regression: k=4, R2 score=0.6555
kNN regression: k=5, R2 score=0.6409
kNN regression: k=6, R2 score=0.6338
kNN regression: k=7, R2 score=0.6281
kNN regression: k=8, R2 score=0.6336
kNN regression: k=9, R2 score=0.6444
kNN regression: k=10, R2 score=0.6350


In [9]:
scl1 = StandardScaler()
Xs_train = scl1.fit_transform(X_train)
Xs_test = scl1.transform(X_test)

for a in range(0, 100, 10):
    model1 = Ridge(alpha=a).fit(Xs_train, y_train)
    print(f'Ridge regression: alpha={a}, R2 score={model1.score(Xs_test, y_test):.4f}')

for k in range(1, 11):
    model2 = KNeighborsRegressor(n_neighbors=k).fit(Xs_train, y_train)
    print(f'kNN regression: {k=}, R2 score={model2.score(Xs_test, y_test):.4f}')

Ridge regression: alpha=0, R2 score=0.5797
Ridge regression: alpha=10, R2 score=0.5846
Ridge regression: alpha=20, R2 score=0.5875
Ridge regression: alpha=30, R2 score=0.5891
Ridge regression: alpha=40, R2 score=0.5899
Ridge regression: alpha=50, R2 score=0.5902
Ridge regression: alpha=60, R2 score=0.5900
Ridge regression: alpha=70, R2 score=0.5895
Ridge regression: alpha=80, R2 score=0.5887
Ridge regression: alpha=90, R2 score=0.5877
kNN regression: k=1, R2 score=0.4361
kNN regression: k=2, R2 score=0.6436
kNN regression: k=3, R2 score=0.6364
kNN regression: k=4, R2 score=0.6292
kNN regression: k=5, R2 score=0.6380
kNN regression: k=6, R2 score=0.6457
kNN regression: k=7, R2 score=0.6459
kNN regression: k=8, R2 score=0.6525
kNN regression: k=9, R2 score=0.6589
kNN regression: k=10, R2 score=0.6557


In [10]:
scl1 = RobustScaler()
Xs_train = scl1.fit_transform(X_train)
Xs_test = scl1.transform(X_test)

for a in range(0, 100, 10):
    model1 = Ridge(alpha=a).fit(Xs_train, y_train)
    print(f'Ridge regression: alpha={a}, R2 score={model1.score(Xs_test, y_test):.4f}')

for k in range(1, 11):
    model2 = KNeighborsRegressor(n_neighbors=k).fit(Xs_train, y_train)
    print(f'kNN regression: {k=}, R2 score={model2.score(Xs_test, y_test):.4f}')

Ridge regression: alpha=0, R2 score=0.5797
Ridge regression: alpha=10, R2 score=0.5836
Ridge regression: alpha=20, R2 score=0.5848
Ridge regression: alpha=30, R2 score=0.5844
Ridge regression: alpha=40, R2 score=0.5829
Ridge regression: alpha=50, R2 score=0.5808
Ridge regression: alpha=60, R2 score=0.5783
Ridge regression: alpha=70, R2 score=0.5755
Ridge regression: alpha=80, R2 score=0.5725
Ridge regression: alpha=90, R2 score=0.5695
kNN regression: k=1, R2 score=0.4427
kNN regression: k=2, R2 score=0.6393
kNN regression: k=3, R2 score=0.6745
kNN regression: k=4, R2 score=0.6654
kNN regression: k=5, R2 score=0.6844
kNN regression: k=6, R2 score=0.6866
kNN regression: k=7, R2 score=0.6903
kNN regression: k=8, R2 score=0.6911
kNN regression: k=9, R2 score=0.6861
kNN regression: k=10, R2 score=0.6748
