In [102]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.plotly as py
import plotly.graph_objs as go
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
%matplotlib inline

# Classification

In [35]:
n_circles = 30
x_circles = np.random.normal(loc=1.0, scale=0.4, size=n_circles)
y_circles = np.random.uniform(low=0.0, high=1.0, size=n_circles) + np.random.normal(loc=0.0, scale=0.4, size=n_circles)

In [20]:
n_crosses = 30
x_crosses = np.random.uniform(low=0.0, high=3.0, size=n_crosses)
y_crosses = np.random.uniform(low=1.0, high=3.0, size=n_crosses)

In [38]:
x_unknown = [0.7, 2.2]
y_unknown = [1.2, 0.5]

In [36]:
trace_circles = go.Scatter(
    x = x_circles,
    y = y_circles,
    name = 'velos',
    mode = 'markers',
    marker = dict(
        size = 10,
        color = 'rgba(152, 0, 0, .8)',
        line = dict(
            width = 2,
            color = 'rgb(0, 0, 0)'
        )
    )
)

trace_crosses = go.Scatter(
    x = x_crosses,
    y = y_crosses,
    name = 'voitures',
    mode = 'markers',
    marker = dict(
        size = 10,
        color = 'rgba(0, 182, 193, .9)',
        line = dict(
            width = 2,
        )
    )
)

data = [trace_circles, trace_crosses]

py.iplot(data, filename='basic-scatter')

In [39]:
trace_circles = go.Scatter(
    x = x_circles,
    y = y_circles,
    name = 'velos',
    mode = 'markers',
    marker = dict(
        size = 10,
        color = 'rgba(152, 0, 0, .8)',
        line = dict(
            width = 2,
            color = 'rgb(0, 0, 0)'
        )
    )
)

trace_crosses = go.Scatter(
    x = x_crosses,
    y = y_crosses,
    name = 'voitures',
    mode = 'markers',
    marker = dict(
        size = 10,
        color = 'rgba(0, 182, 193, .9)',
        line = dict(
            width = 2,
        )
    )
)

trace_unknowns = go.Scatter(
    x = x_unknown,
    y = y_unknown,
    name = "unknown",
    mode = "markers",
    marker = dict(
        size = 10,
        color = 'rgba(0, 0, 0, 0.15)',
        line = dict(width = 2,)
    )
)

data = [trace_circles, trace_crosses, trace_unknowns]

py.iplot(data, filename='basic-scatter')

## Applying model

In [66]:
X = pd.DataFrame({"x": np.append(x_circles, x_crosses),
                 "y": np.append(y_circles, y_crosses)}).as_matrix()
y = [0] * n_circles + [1] * n_crosses

In [97]:
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))
y_ = np.arange(y_min, y_max, 0.1)


### KNN

In [96]:
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X, y)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=7, p=2,
           weights='uniform')

In [98]:
z = knn.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

In [101]:
trace_circles = go.Scatter(
    x = x_circles,
    y = y_circles,
    name = 'velos',
    mode = 'markers',
    marker = dict(
        size = 10,
        color = 'rgba(152, 0, 0, .8)',
        line = dict(
            width = 2,
            color = 'rgb(0, 0, 0)'
        )
    )
)

trace_crosses = go.Scatter(
    x = x_crosses,
    y = y_crosses,
    name = 'voitures',
    mode = 'markers',
    marker = dict(
        size = 10,
        color = 'rgba(0, 182, 193, .9)',
        line = dict(
            width = 2,
        )
    )
)

trace_unknowns = go.Scatter(
    x = x_unknown,
    y = y_unknown,
    name = "unknown",
    mode = "markers",
    marker = dict(
        size = 10,
        color = 'rgba(0, 0, 0, 0.15)',
        line = dict(width = 2,)
    )
)

contour = go.Contour(
    x=xx[0], y=y_, 
    z=z,
    colorscale=[[0, 'purple'],
                [1, 'cyan']
               ],
    opacity=0.5,
    showscale=False
)

data = [trace_circles, trace_crosses, trace_unknowns, contour]

py.iplot(data, filename='basic-scatter')

### Decision trees

In [110]:
tree = DecisionTreeClassifier(max_depth=5)
tree.fit(X, y)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=5,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [111]:
z = tree.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

In [112]:
trace_circles = go.Scatter(
    x = x_circles,
    y = y_circles,
    name = 'velos',
    mode = 'markers',
    marker = dict(
        size = 10,
        color = 'rgba(152, 0, 0, .8)',
        line = dict(
            width = 2,
            color = 'rgb(0, 0, 0)'
        )
    )
)

trace_crosses = go.Scatter(
    x = x_crosses,
    y = y_crosses,
    name = 'voitures',
    mode = 'markers',
    marker = dict(
        size = 10,
        color = 'rgba(0, 182, 193, .9)',
        line = dict(
            width = 2,
        )
    )
)

trace_unknowns = go.Scatter(
    x = x_unknown,
    y = y_unknown,
    name = "unknown",
    mode = "markers",
    marker = dict(
        size = 10,
        color = 'rgba(0, 0, 0, 0.15)',
        line = dict(width = 2,)
    )
)

contour = go.Contour(
    x=xx[0], y=y_, 
    z=z,
    colorscale=[[0, 'purple'],
                [1, 'cyan']
               ],
    opacity=0.5,
    showscale=False
)

data = [trace_circles, trace_crosses, trace_unknowns, contour]

py.iplot(data, filename='basic-scatter')

# Wine

In [100]:
wine_data = datasets.load_wine()

In [47]:
wine_data.data.shape

(178, 13)

In [48]:
wine_data.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2])

In [49]:
wine_data.feature_names

['alcohol',
 'malic_acid',
 'ash',
 'alcalinity_of_ash',
 'magnesium',
 'total_phenols',
 'flavanoids',
 'nonflavanoid_phenols',
 'proanthocyanins',
 'color_intensity',
 'hue',
 'od280/od315_of_diluted_wines',
 'proline']

In [50]:
wine_data.target_names

array(['class_0', 'class_1', 'class_2'], dtype='<U7')