In [1]:
from collections import Counter

import numpy as np
from scipy.spatial.distance import euclidean

from algora.base import BaseEstimator


class KNNBase(BaseEstimator):
    def __init__(self, k=5, distance_func=euclidean):
        """Base class for Nearest neighbors classifier and regressor.

        Parameters
        ----------
        k : int, default 5
            The number of neighbors to take into account. If 0, all the
            training examples are used.
        distance_func : function, default euclidean distance
            A distance function taking two arguments. Any function from
            scipy.spatial.distance will do.
        """

        self.k = None if k == 0 else k  # l[:None] returns the whole list
        self.distance_func = distance_func

    def aggregate(self, neighbors_targets):
        raise NotImplementedError()

    def _predict(self, X=None):
        predictions = [self._predict_x(x) for x in X]

        return np.array(predictions)

    def _predict_x(self, x):
        """Predict the label of a single instance x."""

        # compute distances between x and all examples in the training set.
        distances = (self.distance_func(x, example) for example in self.X)

        # Sort all examples by their distance to x and keep their target value.
        neighbors = sorted(((dist, target)
                            for (dist, target) in zip(distances, self.y)),
                           key=lambda x: x[0])

        # Get targets of the k-nn and aggregate them (most common one or
        # average).
        neighbors_targets = [target for (_, target) in neighbors[:self.k]]

        return self.aggregate(neighbors_targets)


class KNNClassifier(KNNBase):
    """Nearest neighbors classifier.

    Note: if there is a tie for the most common label among the neighbors, then
    the predicted label is arbitrary."""

    def aggregate(self, neighbors_targets):
        """Return the most common target label."""

        most_common_label = Counter(neighbors_targets).most_common(1)[0][0]
        return most_common_label


class KNNRegressor(KNNBase):
    """Nearest neighbors regressor."""

    def aggregate(self, neighbors_targets):
        """Return the mean of all targets."""

        return np.mean(neighbors_targets)


In [5]:
try:
    from sklearn.model_selection import train_test_split
except ImportError:
    from sklearn.cross_validation import train_test_split
from sklearn.datasets import make_classification
from sklearn.datasets import make_regression
from scipy.spatial import distance

from algora import knn
from algora.metrics.metrics import mean_squared_error, accuracy


def regression():
    # Generate a random regression problem
    X, y = make_regression(n_samples=500, n_features=5,
                           n_informative=5, n_targets=1,
                           noise=0.05, random_state=1111, bias=0.5)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
                                                        random_state=1111)

    model = knn.KNNRegressor(k=5, distance_func=distance.euclidean)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print('regression mse', mean_squared_error(y_test, predictions))


def classification():
    X, y = make_classification(n_samples=500, n_features=5, n_informative=5,
                               n_redundant=0, n_repeated=0, n_classes=3,
                               random_state=1111, class_sep=1.5, )

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
                                                        random_state=1111)

    clf = knn.KNNClassifier(k=5, distance_func=distance.euclidean)

    clf.fit(X_train, y_train)
    predictions = clf.predict(X_test)
    print('classification accuracy', accuracy(y_test, predictions))


if __name__ == '__main__':
    regression()
    classification()


regression mse 699.8333076147131
classification accuracy 0.98
