In [28]:
import pandas
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import KFold, cross_val_score

header_name = [
    'Class', 'Alcohol', 'MalicAcid', 'Ash', 'AlcalinityOfAsh', 
    'Magnesium', 'TotalPhenols', 'Flavanoids', 'NonflavanoidPhenols',
    'Proanthocyanins', 'ColorIntensity', 'Hue', 'OD280', 'Proline'
]
data = pandas.read_csv(filepath_or_buffer='wine.data.txt', header=None, names=header_name)

Y = data.Class
X = data.drop('Class', axis=1)

def get_scores_by_n_neighbors(d, x, y, n_neighbors):
    clf = KNeighborsClassifier(n_neighbors=n_neighbors)

    k_fold = KFold(len(d), n_folds=5, random_state=42, shuffle=True)
    scores = cross_val_score(clf, x, y, cv=k_fold, scoring='accuracy')
    return scores.mean()

all_scores = list()
for n_neighbors in range(1, 51):
    all_scores.append((n_neighbors, get_scores_by_n_neighbors(data, X, Y, n_neighbors=n_neighbors)))
    
print(all_scores)
max_scores = max(all_scores, key=lambda item: item[1])
print(max_scores)

[(1, 0.7304761904761905), (2, 0.66253968253968254), (3, 0.70825396825396825), (4, 0.65777777777777779), (5, 0.67460317460317465), (6, 0.67428571428571438), (7, 0.68000000000000005), (8, 0.67999999999999994), (9, 0.70238095238095233), (10, 0.68015873015873018), (11, 0.70253968253968258), (12, 0.69666666666666666), (13, 0.69095238095238087), (14, 0.67936507936507939), (15, 0.70190476190476203), (16, 0.67952380952380964), (17, 0.70158730158730154), (18, 0.67952380952380953), (19, 0.67936507936507928), (20, 0.69095238095238087), (21, 0.70190476190476192), (22, 0.69650793650793641), (23, 0.70206349206349206), (24, 0.70761904761904759), (25, 0.70190476190476192), (26, 0.69650793650793652), (27, 0.69634920634920638), (28, 0.70793650793650786), (29, 0.7134920634920634), (30, 0.70793650793650786), (31, 0.69095238095238098), (32, 0.7134920634920634), (33, 0.7134920634920634), (34, 0.72460317460317469), (35, 0.72460317460317469), (36, 0.71349206349206362), (37, 0.7134920634920634), (38, 0.7134920

In [33]:
from sklearn.preprocessing import scale


X_scaled = scale(X)

all_scores_scaled = list()
for n_neighbors in range(1, 51):
    all_scores_scaled.append((n_neighbors, get_scores_by_n_neighbors(data, X_scaled, Y, n_neighbors=n_neighbors)))
    
print(all_scores_scaled)
max_scores_scaled = max(all_scores_scaled, key=lambda item: item[1])
print(max_scores_scaled)

[(1, 0.94396825396825401), (2, 0.93285714285714294), (3, 0.95507936507936508), (4, 0.93825396825396834), (5, 0.94936507936507941), (6, 0.94952380952380955), (7, 0.94952380952380955), (8, 0.95523809523809522), (9, 0.96079365079365076), (10, 0.96079365079365076), (11, 0.96095238095238089), (12, 0.95523809523809522), (13, 0.94952380952380933), (14, 0.96634920634920629), (15, 0.97206349206349218), (16, 0.97206349206349218), (17, 0.96650793650793643), (18, 0.96650793650793643), (19, 0.95539682539682536), (20, 0.96650793650793643), (21, 0.96095238095238111), (22, 0.96650793650793643), (23, 0.96095238095238089), (24, 0.95523809523809522), (25, 0.95523809523809522), (26, 0.96079365079365076), (27, 0.95523809523809522), (28, 0.96634920634920629), (29, 0.9776190476190475), (30, 0.96079365079365076), (31, 0.95523809523809522), (32, 0.96079365079365076), (33, 0.96634920634920629), (34, 0.96634920634920629), (35, 0.96079365079365076), (36, 0.96079365079365076), (37, 0.95523809523809522), (38, 0.960