# 决策树鸢尾花分类


In [1]:
from sklearn.datasets import load_iris
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt


In [2]:
x,y = load_iris().data[:,2:4],load_iris().target
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=50,random_state=0)


In [3]:

depth = np.arange(1,15)
err_list = []
for i in depth:
    model = DecisionTreeClassifier(criterion='entropy',max_depth=i)
    model.fit(x_train,y_train)
    pred = model.predict(x_test)
    ac = accuracy_score(y_test,pred)
    err = 1-ac
    err_list.append(err)
    
plt.plot(depth,err_list,'ro-')
plt.xlabel('depth of the tree')
plt.ylabel('predicted error rate')
plt.show()

In [4]:
model = DecisionTreeClassifier(criterion='entropy',max_depth=3)
model.fit(x_train,y_train)
pred = model.predict(x_test)
ac = accuracy_score(y_test,pred)
print('模型预测准确率',ac)

In [5]:
from matplotlib.colors import ListedColormap
N,M = 500,500
t1 = np.linspace(0,8,N)
t2 = np.linspace(0,3,M)
x1,x2 = np.meshgrid(t1,t2)
x_new = np.stack((x1.flatten(),x2.flatten()),axis=1)
y_predict = model.predict(x_new)
y_hat = y_predict.reshape(x1.shape)
iris_cmap = ListedColormap(['#ACC6c0','#FF8080','#A0A0FF'])
plt.pcolormesh(x1,x2,y_hat,cmap=iris_cmap)
plt.scatter(x[y ==0,0],x[y ==0,1],c='g',s=60,marker='o')
plt.scatter(x[y ==1,0],x[y ==1,1],c='r',s=60,marker='v')
plt.scatter(x[y ==2,0],x[y ==2,1],c='b',s=60,marker='*')
plt.show()

In [6]:
from sklearn.tree import DecisionTreeRegressor
x = np.array([[1],[2],[3],[5],[7]])
y = np.array([[4],[8],[9],[10],[19]])

depth = np.arange(1,10)
err_list = []
for i in depth:
    model = DecisionTreeClassifier(criterion='entropy',max_depth=i)
    model.fit(x_train,y_train)
    pred = model.predict(x_test)
    ac = accuracy_score(y_test,pred)
    err = 1-ac
    err_list.append(err)

plt.plot(depth,err_list,'bo--')
plt.xlabel('depth')
plt.ylabel('error rate')
plt.show()

# 决策树解决分类


In [7]:
import pandas as pd
names = ['age','height','weight','gender']
dataset = pd.read_csv("gender-data-y.txt",delimiter=',',names= names)
print('客户信息数据集为:',dataset)

In [8]:
from sklearn import preprocessing

dataset['height'] = dataset['height'].astype(float)
dataset['weight'] = dataset['weight'].astype(float)

label = preprocessing.LabelEncoder()
dataset['label'] = label.fit_transform(dataset['gender'])
print('经过处理后数据集为：\n',dataset)

In [9]:
import matplotlib.pyplot as plt
data = dataset.iloc[:,range(1,3)].values
target = dataset.iloc[:,range(4,5)].values.reshape(1,100)[0]

plt.scatter(data[target == 0,0],data[target == 0,1],c='r',s=60,marker='o')
plt.scatter(data[target == 1,0],data[target == 1,1],c='b',s=60,marker='*')

plt.xlabel('Height/cm')
plt.ylabel('weight/kg')
plt.show()

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import numpy as py

x,y = data,target
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=0)

In [11]:
from sklearn.metrics import classification_report
model = DecisionTreeClassifier(criterion='entropy',max_depth=5)
model.fit(x_train,y_train)
pred = model.predict(x_test)
re = classification_report(y_test,pred)
print('模型预测的评估报告为：\n',re)

In [12]:
from matplotlib.colors import ListedColormap
N,M = 500,500
t1 = np.linspace(140,195,N)
t2 = np.linspace(30,90,M)
x1,x2 = np.meshgrid(t1,t2)
x_new = np.stack((x1.flatten(),x2.flatten()),axis=1)
y_predict = model.predict(x_new)
y_hat = y_predict.reshape(x1.shape)
iris_cmap = ListedColormap(['#ACC6c0','#FF8080'])
plt.pcolormesh(x1,x2,y_hat,cmap=iris_cmap)
plt.scatter(x[y ==0,0],x[y ==0,1],c='r',s=60,marker='o')
plt.scatter(x[y ==1,0],x[y ==1,1],c='b',s=60,marker='s')
plt.xlabel('Height/cm')
plt.ylabel('weight/kg')
plt.show()