## K-Nearest Neighbors 

#### Finding an Observation Nearest Neighbors

In [650]:
from sklearn.datasets import load_iris
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler

# load data
iris = datasets.load_iris()
features = iris.data
standardizer = StandardScaler()

# standardize features
features_standardized = standardizer.fit_transform(features)

# two nearest neighbors
nearest_neighbors = NearestNeighbors(n_neighbors = 2, 
                                    metric = 'euclidean')
nearest_neighbors.fit(features_standardized)

# Create an observation
new_observation = [ 1, 1, 1, 1]

# predict 
distances, indices = nearest_neighbors.kneighbors([new_observation])

# View distance to its nearest neighbors (vectors)
features_standardized[indices]

# view observation nearest neighbors (calculated)
distances

# list of nearest neighbors
nearest_neighbors_with_self = nearest_neighbors.kneighbors_graph(
    features_standardized).toarray()

# metric: euclidean, manhattan, minkowski

#### Creating K-Nearest Neighbor Classifier

In [651]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import datasets

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# standardize features
features_standardized = StandardScaler().fit_transform(features)

# Train KNN
knn = KNeighborsClassifier(n_neighbors = 5, n_jobs=-1,
                           metric = 'manhattan')
knn.fit(features_standardized, target)

# Create two observations
new_observations = [[ 0.75, 0.75, 0.75, 0.75],
 [ 1, 1, 1, 1]]

# Predict the class of two observations
knn.predict(new_observations)
knn.predict_proba(new_observations)

array([[0. , 0.6, 0.4],
       [0. , 0. , 1. ]])

#### Identifying the Best Neighborhood Size: the best value for K


In [646]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import GridSearchCV

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# Standardize features
features_standardized = standardizer.fit_transform(features)

# create KNN classifier
knn = KNeighborsClassifier(n_neighbors =5, n_jobs =-1)

# create pipeline
pipeline = Pipeline([('standardizer', standardizer), ('knn', knn)])

# Create space of candidate values
search_space = [{"knn__n_neighbors": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}]

# Create grid search
classifier = GridSearchCV(
 pipeline, search_space, cv=5, verbose=0).fit(
    features_standardized, target)

# Best neighborhood size (k)
classifier.best_estimator_.get_params()["knn__n_neighbors"]

#### Creating Radius-Based Nearest Neighbor Classifier

In [654]:
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris

# load data
iris = load_iris()
features = iris.data
targets = iris.target

# standardize features
features_standardized = StandardScaler().fit_transform(features)

# create radius based nn
radius_nn = RadiusNeighborsClassifier(radius = .5, n_jobs=-1)
radius_nn.fit(features_standardized, target)

# generate new observations
new_obs = [[1,1,1,1]]

# predict new observation class
radius_nn.predict(new_obs)



array([2])