In [63]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import mglearn

In [64]:
%matplotlib


Using matplotlib backend: Qt5Agg


In [65]:
mglearn.plots.plot_knn_classification(n_neighbors=1)



In [66]:
from sklearn.model_selection import train_test_split

In [67]:
X,y = mglearn.datasets.make_forge()



In [68]:
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.4)

In [69]:
from sklearn.neighbors import KNeighborsClassifier

In [70]:
knn = KNeighborsClassifier(n_neighbors=3)

In [71]:
knn.fit(X_train,y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=3, p=2,
           weights='uniform')

In [72]:
print(knn.predict(X_test))

[1 0 1 0 1 0 0 0 0 1 1]


In [73]:
print("Test set score : {:.2f}".format(knn.score(X_test,y_test)))

Test set score : 0.91


# Dicision boundary

In [74]:
fig, axes = plt.subplots(1,3,figsize=(10,3))

In [75]:
for n_neighbors,ax in zip([1,3,9],axes):
    clf = KNeighborsClassifier(n_neighbors=n_neighbors).fit(X,y)
    
    mglearn.plots.plot_2d_separator(clf,X,fill=True,eps=0.5,ax=ax,alpha=0.4)
    mglearn.discrete_scatter(X[:,0],X[:,1],y,ax=ax)
    ax.set_title("{} neighbor(s)".format(n_neighbors))
    ax.set_xlabel("feature 0")
    ax.set_ylabel("feature 1")
axes[0].legend(loc=3)



<matplotlib.legend.Legend at 0x1bc2f770390>

# Breast cancer dataset

In [76]:
from sklearn.datasets import load_breast_cancer

In [77]:
cancer = load_breast_cancer()

# know your dataset

In [82]:
cancer['data'].shape

(569, 30)

In [83]:
cancer['target_names']

array(['malignant', 'benign'], dtype='<U9')

In [85]:
print("Keys of cancer dataset : \n {}".format(cancer.keys()))

Keys of cancer dataset : 
 dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])


In [89]:
cancer['feature_names']


array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [90]:
cancer_dataframe = pd.DataFrame(cancer['data'],columns=cancer['feature_names'])

In [92]:
cancer_dataframe.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


# Split data between train and test

In [93]:
from sklearn.model_selection import train_test_split

In [94]:
X_train,X_test,y_train,y_test = train_test_split(cancer['data'],cancer['target'],random_state=0)

# Import KNeighborsClassifier


In [95]:
from sklearn.neighbors import KNeighborsClassifier

In [101]:
training_accuracy = []
test_accuracy = []
neighbors_settings = range(1,11)

In [102]:
for n_neighbors in neighbors_settings:
 # build the model
 clf = KNeighborsClassifier(n_neighbors=n_neighbors)
 clf.fit(X_train, y_train)
 # record training set accuracy
 training_accuracy.append(clf.score(X_train, y_train))
 # record generalization accuracy
 test_accuracy.append(clf.score(X_test, y_test))

In [106]:
plt.plot(neighbors_settings, training_accuracy, label="training accuracy")
plt.plot(neighbors_settings, test_accuracy, label="test accuracy")
plt.ylabel("Accuracy")
plt.xlabel("n_neighbors")
plt.legend()

<matplotlib.legend.Legend at 0x1bc2f8bf400>

# k-neighbors regression

In [107]:
mglearn.plots.plot_knn_regression(n_neighbors=1)

In [108]:
mglearn.plots.plot_knn_regression(n_neighbors=3)

In [109]:
from sklearn.neighbors import KNeighborsRegressor

In [110]:
X,y = mglearn.datasets.make_wave(n_samples=40)

In [111]:
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0)


In [113]:
reg = KNeighborsRegressor(n_neighbors=3)

In [115]:
reg.fit(X_train,y_train)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=3, p=2,
          weights='uniform')

In [116]:
print("Test set predictions: \n {}".format(reg.predict(X_test)))

Test set predictions: 
 [-0.05396539  0.35686046  1.13671923 -1.89415682 -1.13881398 -1.63113382
  0.35686046  0.91241374 -0.44680446 -1.13881398]
