# Import Statements

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import pickle
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

# Load Data

In [2]:
file = open('../../processed_data.pkl', 'rb')
data = pickle.load(file)
file.close()

In [3]:
train_x, train_y, test_x, test_y = data['train_x'], data['train_y'], data['test_x'], data['test_y']

# Radius Neighbors Classifier

In [10]:
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.model_selection import GridSearchCV

In [11]:
radius_neighbors_classifier = RadiusNeighborsClassifier(algorithm='auto', outlier_label='most_frequent', n_jobs=-1)

In [12]:
params = {
    'radius': [0.1, 0.2, 0.5, 1.0, 1.5, 2.0],
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}

In [13]:
# Perform Grid Search on paramaters specified by params

grid_search = GridSearchCV(estimator=radius_neighbors_classifier, param_grid=params, cv=5, n_jobs=-1, verbose=1, scoring = 'accuracy')

In [14]:
grid_search.fit(train_x, train_y)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


GridSearchCV(cv=5,
             estimator=RadiusNeighborsClassifier(n_jobs=-1,
                                                 outlier_label='most_frequent'),
             n_jobs=-1,
             param_grid={'p': [1, 2], 'radius': [0.1, 0.2, 0.5, 1.0, 1.5, 2.0],
                         'weights': ['uniform', 'distance']},
             scoring='accuracy', verbose=1)

In [15]:
# Find best model

grid_search.best_estimator_

RadiusNeighborsClassifier(n_jobs=-1, outlier_label='most_frequent', p=1,
                          radius=2.0, weights='distance')

In [16]:
best_radius_neighbors_classifier = RadiusNeighborsClassifier(algorithm='auto', outlier_label='most_frequent', p=1, radius=2.0, weights='distance', n_jobs=-1)

In [17]:
best_radius_neighbors_classifier.fit(train_x, train_y)
best_radius_neighbors_classifier_predictions = best_radius_neighbors_classifier.predict(test_x)

In [18]:
print('Train Accuracy: {} %'.format(100*best_radius_neighbors_classifier.score(train_x, train_y)))
print('Test Accuracy: {} %'.format(100*best_radius_neighbors_classifier.score(test_x, test_y)))

Train Accuracy: 99.84470844009628 %
Test Accuracy: 96.7080745341615 %
