### P017 IRIS数据-加载认识数据

In [1]:
from sklearn.datasets import load_iris

In [2]:
iris = load_iris()

In [3]:
type(iris)

sklearn.utils.Bunch

In [4]:
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [5]:
iris["filename"]

'/Users/peishuaishuai/.local/lib/python3.8/site-packages/sklearn/datasets/data/iris.csv'

### P018 IRIS数据-查看列名和分类名

In [6]:
from sklearn.datasets import load_iris

In [7]:
iris = load_iris()

In [8]:
iris['feature_names']

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [9]:
iris['target_names']

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

### P019 IRIS数据-数据和目标的shape

In [10]:
from sklearn.datasets import load_iris

In [11]:
iris = load_iris()

In [12]:
data = iris['data']
target = iris['target']

In [13]:
print(data.shape)
print(target.shape)

(150, 4)
(150,)


### P020 IRIS数据-拆分训练测试集

In [14]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [15]:
iris = load_iris()

In [16]:
data = iris['data']
target = iris['target']

In [17]:
data_train, data_test, target_train, target_test = \
    train_test_split(data, target, test_size=0.3)

In [18]:
print(f'data_train shape: {data_train.shape}')
print(f'target_train shape: {target_train.shape}')
print(f'data_test shape: {data_test.shape}')
print(f'target_test shape: {target_test.shape}')

data_train shape: (105, 4)
target_train shape: (105,)
data_test shape: (45, 4)
target_test shape: (45,)


### P021 IRIS数据-逻辑回归训练

In [19]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
 
iris = load_iris()
data = iris['data']
target = iris['target']
data_train, data_test, target_train, target_test = \
    train_test_split(data, target, test_size=0.3, random_state=20)

In [20]:
data_train.shape

(105, 4)

In [21]:
target_train.shape

(105,)

In [22]:
model = LogisticRegression(max_iter=1000)
model.fit(data_train, target_train)

LogisticRegression(max_iter=1000)

In [23]:
accuracy = model.score(data_test, target_test)
accuracy

0.9333333333333333

### P022 IRIS数据-在测试集上实现预估

In [24]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

iris = load_iris()
data = iris['data']
target = iris['target']
data_train, data_test, target_train, target_test = \
    train_test_split(data, target, test_size=0.3, random_state=20)
 
model = LogisticRegression(max_iter=1000)
model.fit(data_train, target_train)

LogisticRegression(max_iter=1000)

In [25]:
target_pred = model.predict(data_test)

In [26]:
target_pred

array([0, 1, 1, 2, 1, 1, 2, 0, 2, 0, 2, 1, 1, 0, 0, 2, 0, 1, 2, 1, 1, 2,
       2, 0, 1, 1, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 2, 1, 2, 0, 1,
       1])

### P023 IRIS数据-理解混淆矩阵

In [27]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

iris = load_iris()
data = iris['data']
target = iris['target']

data_train, data_test, target_train, target_test = \
    train_test_split(data, target, test_size=0.3, random_state=20)
 
model = LogisticRegression(max_iter=1000)
model.fit(data_train, target_train)
 
target_pred = model.predict(data_test)

In [28]:
from sklearn.metrics import confusion_matrix

In [29]:
cm = confusion_matrix(target_test, target_pred)
cm

array([[13,  0,  0],
       [ 0, 18,  0],
       [ 0,  3, 11]])

### P024 IRIS数据-理解分类报告

In [30]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
 
iris = load_iris()
data = iris['data']
target = iris['target']

data_train, data_test, target_train, target_test = \
    train_test_split(data, target, test_size=0.3, random_state=20)
 
model = LogisticRegression(max_iter=1000)
model.fit(data_train, target_train)
 
target_pred = model.predict(data_test)

In [31]:
print(classification_report(
    target_test, target_pred, target_names=iris["target_names"]))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        13
  versicolor       0.86      1.00      0.92        18
   virginica       1.00      0.79      0.88        14

    accuracy                           0.93        45
   macro avg       0.95      0.93      0.93        45
weighted avg       0.94      0.93      0.93        45

