In [52]:
%matplotlib inline
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn import metrics
from sklearn import datasets
from sklearn import neighbors
from sklearn import cross_validation
from sklearn import preprocessing

In [53]:
data = np.genfromtxt('wine.data', delimiter=",")

In [54]:
y = data[:, 0]
X = data[:, 1:]

In [67]:
kf = cross_validation.KFold(len(data), n_folds=5, random_state=42, shuffle=True)

In [68]:
clf = neighbors.KNeighborsClassifier() 

In [69]:
scores = cross_validation.cross_val_score(clf, X, y, cv=kf, scoring='accuracy')

In [70]:
scores

array([ 0.72222222,  0.61111111,  0.61111111,  0.68571429,  0.74285714])

In [71]:
scores.mean()

0.67460317460317465

In [102]:
scores_np = np.empty(51, dtype=[('id', int),('data', float)])

In [103]:
for k in range(1,51):
    clf = neighbors.KNeighborsClassifier(n_neighbors=k)
    scores = cross_validation.cross_val_score(clf, X, y, cv=kf, scoring='accuracy')
    scores_np[k] = (k, scores.mean())
    

In [104]:
np.sort(scores_np, order='data')[-5:]

array([(36, 0.7134920634920636), (48, 0.7190476190476192),
       (34, 0.7246031746031747), (35, 0.7246031746031747),
       (1, 0.7304761904761905)], 
      dtype=[('id', '<i8'), ('data', '<f8')])

# Scaled

In [105]:
X_scaled = preprocessing.scale(X)

In [109]:
scores_scaled_np = np.empty(51, dtype=[('id', int),('data', float)])

In [110]:
for k in range(1,51):
    clf = neighbors.KNeighborsClassifier(n_neighbors=k)
    clf.fit(X_scaled, y)
    scores = cross_validation.cross_val_score(clf, X_scaled, y, cv=kf, scoring='accuracy')
    scores_scaled_np[k] = (k, scores.mean())

In [114]:
np.sort(scores_scaled_np, order="data")[-5:]

array([(20, 0.9665079365079364), (22, 0.9665079365079364),
       (15, 0.9720634920634922), (16, 0.9720634920634922),
       (29, 0.9776190476190475)], 
      dtype=[('id', '<i8'), ('data', '<f8')])