In [1]:
import numpy as np
import pandas as pd
from plotly.offline import init_notebook_mode, iplot

init_notebook_mode(connected=True)
import plotly.graph_objs as go
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

%matplotlib inline



# Récupération des données

In [2]:
wine_data = pd.read_csv("wines.csv")

In [3]:
wine_data.head()

Unnamed: 0,flavanoids,alcohol,wine_class
0,2.65,13.05,1
1,2.29,12.08,1
2,3.24,13.16,0
3,0.76,13.32,2
4,1.57,13.5,2


In [4]:
flavanoids_column_name = "flavanoids"
alcohol_column_name = "alcohol"
target_column_name = "wine_class"

In [5]:
features = wine_data.drop(target_column_name, axis=1)
target = wine_data[target_column_name]

In [6]:
trace_wines_type_0 = go.Scatter(
    x=wine_data.loc[target == 0, flavanoids_column_name].tolist(),
    y=wine_data.loc[target == 0, alcohol_column_name],
    name='Type 0',
    mode='markers',
    marker=dict(
        size=10,
        color='rgba(12, 0, 152, .8)',
        line=dict(
            width=2,
            color='rgb(0, 0, 0)'
        )
    )
)

trace_wines_type_1 = go.Scatter(
    x=wine_data.loc[target == 1, flavanoids_column_name],
    y=wine_data.loc[target == 1, alcohol_column_name],
    name='Type 1',
    mode='markers',
    marker=dict(
        size=10,
        color='rgba(0, 152, 0, .8)',
        line=dict(
            width=2,
            color='rgb(0, 0, 0)'
        )
    )
)

trace_wine_types_2 = go.Scatter(
    x=wine_data.loc[target == 2, flavanoids_column_name],
    y=wine_data.loc[target == 2, alcohol_column_name],
    name='Type 2',
    mode='markers',
    marker=dict(
        size=10,
        color='rgba(152, 0, 0, .8)',
        line=dict(
            width=2,
            color='rgb(0, 0, 0)'
        )
    )
)

trace_unknown_wines = go.Scatter(
    x=[1.8, 0.7, 3.1],
    y=[13.2, 12.0, 12.7],
    name="Inconnus",
    mode="markers",
    marker=dict(
        size=10,
        color='rgba(0, 0, 0, 0.15)',
        line=dict(width=2, )
    )
)

layout = dict(title='Vins',
              xaxis=dict(title=flavanoids_column_name),
              yaxis=dict(title=alcohol_column_name)
              )

data = [trace_wines_type_0, trace_wines_type_1, trace_wine_types_2, trace_unknown_wines]

iplot({"data": data, "layout": layout}, filename=None)

# Entraînement de l'algorithme des K plus proches voisins

In [7]:
knn = KNeighborsClassifier(n_neighbors=6)
knn.fit(features, target)

KNeighborsClassifier(n_neighbors=6)

In [16]:
distance_nearest_neighbors, nearest_neighbors = knn.kneighbors([[2.4, 13.2]])
wine_data.loc[nearest_neighbors[0], :]

Unnamed: 0,flavanoids,alcohol,wine_class
167,2.43,13.05,0
145,2.19,13.3,0
143,2.63,13.24,0
115,2.41,12.93,0
137,2.64,13.07,0
32,2.68,13.28,0


# Frontières de décision

In [8]:
x_min, x_max = wine_data.loc[:, flavanoids_column_name].min() - 1, wine_data.loc[:, flavanoids_column_name].max() + 1
y_min, y_max = wine_data.loc[:, alcohol_column_name].min() - 1, wine_data.loc[:, alcohol_column_name].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))
y_ = np.arange(y_min, y_max, 0.1)
z = knn.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

trace_wines_type_0 = go.Scatter(
    x=wine_data.loc[target == 0, flavanoids_column_name],
    y=wine_data.loc[target == 0, alcohol_column_name],
    name='Type 0',
    mode='markers',
    marker=dict(
        size=10,
        color='rgba(0, 0, 152, .8)',
        line=dict(
            width=2,
            color='rgb(0, 0, 0)'
        )
    )
)

trace_wines_type_1 = go.Scatter(
    x=wine_data.loc[target == 1, flavanoids_column_name],
    y=wine_data.loc[target == 1, alcohol_column_name],
    name='Type 1',
    mode='markers',
    marker=dict(
        size=10,
        color='rgba(0, 152, 0, .8)',
        line=dict(
            width=2,
            color='rgb(0, 0, 0)'
        )
    )
)

trace_wine_types_2 = go.Scatter(
    x=wine_data.loc[target == 2, flavanoids_column_name],
    y=wine_data.loc[target == 2, alcohol_column_name],
    name='Type 2',
    mode='markers',
    marker=dict(
        size=10,
        color='rgba(152, 0, 0, .8)',
        line=dict(
            width=2,
            color='rgb(0, 0, 0)'
        )
    )
)

contour = go.Contour(
    x=xx[0], y=y_,
    z=z,
    colorscale=[[0, 'blue'],
                [1, 'yellow'],
                [2, 'red']
                ],
    opacity=0.5,
    showscale=False
)

trace_unknown_wines = go.Scatter(
    x=[3.5, 2.4],
    y=[12.1, 13.2],
    name="Inconnus",
    mode="markers",
    marker=dict(
        size=10,
        color='rgba(0, 0, 0, 0.15)',
        line=dict(width=2, )
    )
)

data = [trace_wines_type_0, trace_wines_type_1, trace_wine_types_2, contour, trace_unknown_wines]

layout = dict(title='Vins',
              xaxis=dict(title=flavanoids_column_name),
              yaxis=dict(title=alcohol_column_name)
              )

iplot({"data": data, "layout": layout}, filename=None)