### Finding an Observation’s Nearest Neighbors

In [44]:
from sklearn import datasets
from sklearn.preprocessing import StandardScaler 
from sklearn.cluster import KMeans
# from sklearn.cluster import MiniBatchKMeans
import matplotlib.pyplot as plt
from numpy import random, float
import numpy as np
%matplotlib inline


iris = datasets.load_iris()
features = iris.data
scaler = StandardScaler()
features_std = scaler.fit_transform(features)

from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(features_std, iris.target, test_size=.2, random_state=33)

In [22]:
from sklearn.neighbors import NearestNeighbors
# Two nearest neighbors
nearest_neighbors = NearestNeighbors(n_neighbors=2, metric='euclidean').fit(xtrain)

new_observation = xtest[1,]
# Find distances and indices of the observation's nearest neighbors
distances, indices = nearest_neighbors.kneighbors([new_observation])

# View the nearest neighbors
xtrain[indices]

array([[[ 1.03800476,  0.09821729,  0.53540856,  0.3957741 ],
        [ 0.91683689, -0.13197948,  0.36489628,  0.26414192]]])

### Creating a K-Nearest Neighbor Classifier

In [45]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3, n_jobs=-1).fit(xtrain, ytrain)

In [46]:
y_pred=knn.predict(xtest)

In [47]:
from sklearn import metrics
print(metrics.accuracy_score(ytest, y_pred))

0.8666666666666667


### Identifying the Best Neighborhood Size

In [37]:
from sklearn.pipeline import Pipeline, FeatureUnion 
from sklearn.model_selection import GridSearchCV

In [39]:
# Create a pipeline
pipe = Pipeline([("standardizer", scaler), ("knn", knn)]) # Create space of candidate values
search_space = [{"knn__n_neighbors": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}]
# Create grid search
classifier = GridSearchCV(pipe, search_space, cv=5, verbose=0).fit(xtrain, ytrain)

In [40]:
# Best neighborhood size (k)
classifier.best_estimator_.get_params()["knn__n_neighbors"]

3

### Creating a Radius-Based Nearest Neighbor Classifier

In [48]:
from sklearn.neighbors import RadiusNeighborsClassifier

rnn = RadiusNeighborsClassifier(radius=.5, n_jobs=-1).fit(xtrain, ytrain)
y_pred=knn.predict(xtest)
print(metrics.accuracy_score(ytest, y_pred))

0.8666666666666667
