In [16]:
from itertools import product
import numpy as np
import scipy as sc
from scipy.stats import multivariate_normal
from sklearn.datasets import make_classification, make_circles
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegressionCV
import plotly.plotly as py
from plotly.graph_objs import *
np.random.seed = 0

def make_figure(X, y, voronoi=True, other_plots = []):
    scatters = []
    for class_ in set(y):
        scatter = Scatter(
            x = X[y==class_, 1],
            y = X[y==class_, 0],
            mode = 'markers'            
        )
        scatters.append(scatter)
    data = scatters + other_plots
    if voronoi:
        # set bounds
        x_min = X[:,1].min()
        x_max = X[:,1].max()
        y_min = X[:,0].min()
        y_max = X[:,0].max()
        x_axis_values = np.linspace(x_min, x_max, 250)
        y_axis_values = np.linspace(y_min, y_max, 250)
        # make predictions
        predictions = knn_model.predict(np.array(list(product(y_axis_values, x_axis_values))))
        z_heat_values = np.reshape(predictions, (len(x_axis_values), -1))
        # make plot objects
        heat_map = Heatmap(x=x_axis_values,
                            y=y_axis_values,
                            z=z_heat_values,
                            connectgaps=False)
        data += [heat_map]

    figure = Figure(data=data)
    return figure

In [77]:
# make data
X, y = make_classification(n_classes=2, n_features=2, 
                           n_informative=2, n_redundant=0, random_state=0)
X, y = make_circles(n_samples=200, noise=.09, factor=.7)

In [78]:
figure = make_figure(X, y, voronoi=False)
py.iplot(figure)

In [79]:
# 2 classes, K = 1 #
# fit KNN model
knn_model = KNeighborsClassifier(1)
knn_model.fit(X, y)

figure = make_figure(X, y)
py.iplot(figure)

In [80]:
# 2 classes, K = 3 #
# fit KNN model
knn_model = KNeighborsClassifier(3)
knn_model.fit(X, y)

figure = make_figure(X, y)
py.iplot(figure)

In [81]:
# 2 classes, K = 5 #
# fit KNN model
knn_model = KNeighborsClassifier(5)
knn_model.fit(X, y)

figure = make_figure(X, y)
py.iplot(figure)

In [82]:
# COMPARE WITH LOGISTIC REGRESSION #
lr_model = LogisticRegressionCV()
lr_model.fit(X, y)

intercept = lr_model.intercept_
theta1, theta2 = lr_model.coef_[0]

def decision_boundary(x1, intercept, theta1, theta2):
    x2 = -(intercept + theta1*x1)/theta2
    return x2

x1 = np.linspace(X[:,1].min(), X[:,1].max(), 250)
x2 = decision_boundary(x1, intercept, theta2, theta1)

decision_boundary_plot = Scatter(
    name='Decision Boundary',
    x=x1,
    y=x2,
    mode='marker',
    marker=Marker(
        color='green',
        line=Line(
            #color='rgba(156, 165, 196, 1.0)',
            width=1,
        ))
)
figure = make_figure(X, y, other_plots = [decision_boundary_plot])
py.iplot(figure)

In [8]:
# make sample data with 4 classes
observations_per_class = 100
n_features = 2
means = [(1, 1), (1, -1), (-1, 1), (-1, -1)]
X = np.zeros((0, n_features))
y = np.array([])
for target, mean in enumerate(means):
    data = multivariate_normal(mean = mean).rvs(observations_per_class)
    X = np.vstack((X, data))
    y = np.append(y, [target]*observations_per_class)

In [9]:
# K = 1 #
# fit KNN model
knn_model = KNeighborsClassifier(1)
knn_model.fit(X, y)

figure = make_figure(X, y)
py.iplot(figure)

In [11]:
# K = 2 #
knn_model = KNeighborsClassifier(2)
knn_model.fit(X, y)

figure = make_figure(X, y)
py.iplot(figure)

In [12]:
# K = 3 #
# fit KNN model
knn_model = KNeighborsClassifier(3)
knn_model.fit(X, y)

figure = make_figure(X, y)
py.iplot(figure)

In [13]:
# K = 10 #
# fit KNN model
knn_model = KNeighborsClassifier(10)
knn_model.fit(X, y)

figure = make_figure(X, y)
py.iplot(figure)

In [15]:
# K = 25 #
knn_model = KNeighborsClassifier(25)
knn_model.fit(X, y)

figure = make_figure(X, y)
py.iplot(figure)

In [10]:
# K = 1 #
# fit KNN model
knn_model = KNeighborsClassifier(1, metric='cosine', algorithm='brute')
knn_model.fit(X, y)

figure = make_figure(X, y)
py.iplot(figure)

In [14]:
# K = 10, cosine #
knn_model = KNeighborsClassifier(10, metric='cosine', algorithm='brute')
knn_model.fit(X, y)

figure = make_figure(X, y)
py.iplot(figure)