Grid search with Scikit-learn
---

Load data set
---

In [None]:
import pandas as pd

# Load data
data_df = pd.read_csv('heart-numerical.csv')

# Create X/y arrays
X = data_df.drop('disease', axis=1).values
y = data_df.disease.values

# First five rows
data_df.head()

In [None]:
from sklearn.model_selection import train_test_split

# Split data into train/test sets
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, random_state=0)

Grid search using for loops
---

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

# Create a k-NN classifier with default values
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier())
])

# Fit to train data
pipe.fit(X_tr, y_tr)

# Evaluate on test set
accuracy = pipe.score(X_te, y_te)
print('Accuracy: {:.3f}'.format(accuracy))

In [None]:
import numpy as np

# Define a set of reasonable values
k_values = np.arange(1, 21) # 1, 2, 3, .., 20
weights_functions = ['uniform', 'distance']
distance_types = [1, 2] # L1, L2 distances

In [None]:
# Create a k-NN classifier
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier())
])

# Save accuracy on test set
test_scores = []

# Grid search
for k in k_values:
    for f in weights_functions:
        for d in distance_types:
            # Set hyperparameters
            pipe.set_params(
                knn__n_neighbors=k, knn__weights=f, knn__p=d)
            
            # Fit a k-NN classifier
            pipe.fit(X_tr, y_tr)
            
            # Evaluate on test set
            accuracy = pipe.score(X_te, y_te)
            
            # Save accuracy
            test_scores.append({
                'knn__n_neighbors': k,
                'knn__weights': f,
                'knn__p': d,
                'accuracy': accuracy
            })

In [None]:
# Create DataFrame with test scores
scores_df = pd.DataFrame(test_scores)

# Top five scores
scores_df.sort_values(by='accuracy', ascending=False).head()

Grid search using ParameterGrid
---

In [None]:
from sklearn.model_selection import ParameterGrid

# Define a grid of values
grid = ParameterGrid({
    'knn__n_neighbors': k_values,
    'knn__weights': weights_functions,
    'knn__p': distance_types
})

# Print the number of combinations
print('Number of combinations:', len(grid))

In [None]:
# Iterate through each combination of parameters
for params_dict in grid:
    print(params_dict)

In [None]:
# Create k-NN classifier
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier())
])

# Save accuracy on test set
test_scores = []

for params_dict in grid:
    # Set parameters
    pipe.set_params(**params_dict)

    # Fit a k-NN classifier
    pipe.fit(X_tr, y_tr)

    # Save accuracy on test set
    params_dict['accuracy'] = pipe.score(X_te, y_te)
    
    # Save result
    test_scores.append(params_dict)

In [None]:
# Create DataFrame with test scores
scores_df = pd.DataFrame(test_scores)

# Top five scores
scores_df.sort_values(by='accuracy', ascending=False).head()

In [None]:
# Define two grids
grid = ParameterGrid([{
    'knn__n_neighbors': [2, 3],
    'knn__p': [1, 2]
},{
    'knn__weights': ['uniform', 'distance'],
    'knn__p': [1, 2]
}])

# List combinations
list(grid)

In [None]:
# Define two grids
grid = ParameterGrid([{
    'knn__n_neighbors': [2, 3],
    'knn__weights': ['uniform'], # Default value: uniform
    'knn__p': [1, 2]
},{
    'knn__n_neighbors': [5], # Default value: 5
    'knn__weights': ['uniform', 'distance'],
    'knn__p': [1, 2]
}])

# List combinations
list(grid)

In [None]:
# Grid with optional steps
grid = ParameterGrid({
    'scaler': [None, StandardScaler()],
    'knn__n_neighbors': [5, 10, 15],
})

# List combinations
list(grid)