# Demo: Decision Trees

In [1]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier

In [2]:
# Load the Iris dataset
iris = load_iris()

# We're going to consider two of the four feature names, petal length
# and petal width, and we'll ignore sepal length and width.
X = iris.data[:, :]
y = iris.target
iris.feature_names[:]

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [6]:
tree_clf = DecisionTreeClassifier(max_depth=3)
tree_clf.fit(X, y)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=3,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [7]:
# Let's take a look at the decision tree...
from sklearn.tree import export_graphviz
export_graphviz(tree_clf, out_file="iris_tree.dot",
               feature_names=iris.feature_names[:],
               class_names=iris.target_names,
               rounded=True,
               filled=True)

In [8]:
# dot will convert the tree from a .dot file to a .png
# In case you don't have dot installed, the png is already
# in your zip file
!dot -Tpng iris_tree.dot -o iris_tree.png
!open iris_tree.png

In [9]:
# how did we do?
tree_clf.score(X, y)

0.9733333333333334

In [19]:
# If we saw a new iris with petal length 5cm and petal width 2cm,
# what are the probabilities we would classify it as each of the
# different species of iris?
tree_clf.predict_proba([[5, 1.5, 3, 2]])

array([[0.        , 0.33333333, 0.66666667]])

In [10]:
# So what would our model predict?
tree_clf.predict([[5, 1.5, 3, 2]])

array([2])

In [12]:
iris.target_names[2]

'virginica'

In [None]:
iris.target