In [1]:
import pandas as pd
import numpy as np
import sklearn
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import HoverTool

In [2]:
iris_data = pd.read_csv('iris.data', sep=',', header=None)

In [3]:
iris_data.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
iris_data.columns

Int64Index([0, 1, 2, 3, 4], dtype='int64')

In [5]:
iris_data.columns = ["sepal_length","sepal_width","petal_length","petal_width","class"]

In [6]:
iris_data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [7]:
iris_data.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [8]:
iris_data.loc[lambda x: x['class'] == 'Iris-setosa',['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
5,5.4,3.9,1.7,0.4
6,4.6,3.4,1.4,0.3
7,5.0,3.4,1.5,0.2
8,4.4,2.9,1.4,0.2
9,4.9,3.1,1.5,0.1


In [9]:
setosa = iris_data.loc[lambda x: x['class'] == 'Iris-setosa',['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]

In [10]:
versicolour = iris_data.loc[lambda x: x['class'] == 'Iris-versicolor',['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]

In [11]:
virginica = iris_data.loc[lambda x: x['class'] == 'Iris-virginica',['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]

In [12]:
output_notebook()

In [13]:
tools = 'pan,wheel_zoom,box_zoom,reset,hover,save'

In [14]:
plot_title = "Sepal Length & Width"

In [15]:
p = figure(title=plot_title, tools=tools, 
           x_axis_label="Sepal Length",
          y_axis_label="Sepal Width")

In [16]:
hover = p.select(dict(type=HoverTool))
hover.point_policy = 'follow_mouse'

In [17]:
# Sepal

In [18]:
setosa_sepal_plot = p.scatter(setosa.loc[:,'sepal_length'].get_values(),
                        setosa.loc[:,'sepal_width'].get_values(),
                        marker="square", fill_color="red", legend="Iris-setosa"
                       )

In [19]:
versicolour_sepal_plot = p.scatter(versicolour.loc[:,'sepal_length'].get_values(),
                        versicolour.loc[:,'sepal_width'].get_values(),
                        marker="circle", fill_color="blue", legend="Iris-versicolour"
                       )

In [20]:
virginica_sepal_plot = p.scatter(virginica.loc[:,'sepal_length'].get_values(),
                        virginica.loc[:,'sepal_width'].get_values(),
                        marker="triangle", fill_color="yellow", legend="Iris-virginica"
                       )

In [21]:
show(p)

In [22]:
# Plotting Sepal length and width reveals that Iris-setosa can be identified with these two attributes
# Consider usnig these to form a bew feature

In [23]:
plot_title = "Petal Length & Width"

In [24]:
petals = figure(title=plot_title, tools=tools, 
           x_axis_label="Petal Length",
          y_axis_label="Petal Width")

In [25]:
hover = petals.select(dict(type=HoverTool))
hover.point_policy = 'follow_mouse'

In [26]:
setosa_petal_plot = petals.scatter(setosa.loc[:,'petal_length'].get_values(),
                        setosa.loc[:,'petal_width'].get_values(),
                        marker="square", fill_color="red", legend="Iris-setosa"
                       )

In [27]:
versicolour_petal_plot = petals.scatter(versicolour.loc[:,'petal_length'].get_values(),
                        versicolour.loc[:,'petal_width'].get_values(),
                        marker="circle", fill_color="blue", legend="Iris-versicolour"
                       )

In [28]:
virginica_petal_plot = petals.scatter(virginica.loc[:,'petal_length'].get_values(),
                        virginica.loc[:,'petal_width'].get_values(),
                        marker="triangle", fill_color="yellow", legend="Iris-virginica"
                       )

In [30]:
show(petals)

In [31]:
# Plotting Petal length and width reveals that Iris-setosa can be still be identified with these two attributes
# Consider usnig these to form a new feature. This new attribute can also strongly split versicolour and virginica