## sklearn.datasets.load_iris
* sklearn.datasets.load_iris(*, return_X_y=False, as_frame=False)
* Returns:

    dataBunch
    Dictionary-like object, with the following attributes.

    data{ndarray, dataframe} of shape (150, 4)
    The data matrix. If as_frame=True, data will be a pandas DataFrame.

    target: {ndarray, Series} of shape (150,)
    The classification target. If as_frame=True, target will be a pandas Series.

    feature_names: list
    The names of the dataset columns.

    target_names: list
    The names of target classes.

    frame: DataFrame of shape (150, 5)
    Only present when as_frame=True. DataFrame with data and target.

    New in version 0.23.

    DESCR: str
    The full description of the dataset.

    filename: str
    The path to the location of the data.

    New in version 0.20.

    (data, target)tuple if return_X_y is True
    A tuple of two ndarray. The first containing a 2D array of shape (n_samples, n_features) with each row representing one sample and each column representing the features. The second ndarray of shape (n_samples,) containing the target samples.

    New in version 0.18.

In [8]:
from sklearn.datasets import load_iris
import pandas as pd
    
iris = load_iris()
iris.keys()


dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [11]:
iris_df = pd.DataFrame(iris.data,columns=iris.feature_names)

In [12]:
iris_df.head(3)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2


In [25]:
iris_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   label              150 non-null    int32  
dtypes: float64(4), int32(1)
memory usage: 5.4 KB


In [16]:
iris_df['label']=iris.target
iris_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),label
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


## sklearn.model_selection.train_test_split
* sklearn.model_selection.train_test_split(*arrays, test_size=None, train_size=None, random_state=None, shuffle=True, stratify=None)
* splittinglist, length=2 * len(arrays)

In [17]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(iris.data,iris.target, test_size=0.33, random_state=42)

## sklearn.tree.DecisionTreeClassifier
* class sklearn.tree.DecisionTreeClassifier(*, criterion='gini', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, class_weight=None, ccp_alpha=0.0)

In [18]:
from sklearn.tree import DecisionTreeClassifier

# DecisionTreeClassifier 객체 학습
dt_clf = DecisionTreeClassifier(random_state=0)

# 학습 수행
dt_clf.fit(X_train,y_train)

In [19]:
dt_clf.classes_

array([0, 1, 2])

In [20]:
dt_clf.feature_importances_

array([0.01256535, 0.02915555, 0.05981177, 0.89846733])

In [21]:
# 학습이 완료된 DecisionTreeClassifier 객체에서 테서트 데이터 세트로 예측 수행.

pred = dt_clf.predict(X_test)

In [22]:
pred

array([1, 0, 2, 1, 1, 0, 1, 2, 2, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0,
       0, 1, 1, 2, 1, 2])

In [23]:
y_test

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0,
       0, 1, 2, 2, 1, 2])

In [26]:
from sklearn.metrics import accuracy_score

accuracy_score(pred,y_test)

0.96