In [11]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsRegressor
from sklearn.datasets import fetch_california_housing
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
import matplotlib.pylab as plt

In [12]:
X,y = fetch_california_housing(return_X_y=True)

In [13]:
pipe = Pipeline([
    ("scale",StandardScaler()),
    ("model",KNeighborsRegressor(n_neighbors=1))
])
pipe.get_params()

{'memory': None,
 'steps': [('scale', StandardScaler()),
  ('model', KNeighborsRegressor(n_neighbors=1))],
 'verbose': False,
 'scale': StandardScaler(),
 'model': KNeighborsRegressor(n_neighbors=1),
 'scale__copy': True,
 'scale__with_mean': True,
 'scale__with_std': True,
 'model__algorithm': 'auto',
 'model__leaf_size': 30,
 'model__metric': 'minkowski',
 'model__metric_params': None,
 'model__n_jobs': None,
 'model__n_neighbors': 1,
 'model__p': 2,
 'model__weights': 'uniform'}

In [33]:
mod = GridSearchCV(estimator=pipe,
            param_grid={'model__n_neighbors':[1,5,10,11,12,13,14,15,16,17,18,20,25,30]},
                   cv = 3
                  )

In [34]:
mod.fit(X,y)
pd.DataFrame(mod.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__n_neighbors,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.009094,0.003887,0.197651,0.01419,1,{'model__n_neighbors': 1},0.324068,0.33483,0.323371,0.327423,0.005245,14
1,0.006119,4.2e-05,0.262336,0.019778,5,{'model__n_neighbors': 5},0.551149,0.579313,0.511781,0.547414,0.027696,13
2,0.006355,7.2e-05,0.305719,0.019104,10,{'model__n_neighbors': 10},0.57376,0.595831,0.532973,0.567522,0.026038,12
3,0.006171,5.9e-05,0.311858,0.02078,11,{'model__n_neighbors': 11},0.576457,0.599167,0.53359,0.569738,0.02719,11
4,0.006139,4.2e-05,0.31795,0.020289,12,{'model__n_neighbors': 12},0.57784,0.598971,0.534325,0.570378,0.026914,10
5,0.006161,6e-05,0.324592,0.021049,13,{'model__n_neighbors': 13},0.579058,0.60148,0.534437,0.571658,0.027866,7
6,0.006172,3.8e-05,0.332499,0.020252,14,{'model__n_neighbors': 14},0.578796,0.60162,0.535207,0.571874,0.027551,5
7,0.006147,3.6e-05,0.337383,0.021404,15,{'model__n_neighbors': 15},0.578943,0.601748,0.534968,0.571886,0.027716,4
8,0.006222,7.2e-05,0.343416,0.020829,16,{'model__n_neighbors': 16},0.57924,0.603356,0.534634,0.57241,0.028468,1
9,0.00616,2.9e-05,0.348076,0.021332,17,{'model__n_neighbors': 17},0.579749,0.602122,0.534193,0.572021,0.028265,3


In [36]:
df = pd.DataFrame(mod.cv_results_)

In [37]:
df['mean_test_score'].max()

0.5724097563108778

In [38]:
df['mean_test_score'].idxmax()

8

In [41]:
print(fetch_california_housing()['DESCR'])

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block group
        - HouseAge      median house age in block group
        - AveRooms      average number of rooms per household
        - AveBedrms     average number of bedrooms per household
        - Population    block group population
        - AveOccup      average number of household members
        - Latitude      block group latitude
        - Longitude     block group longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived