In [1]:
# 붓꽃 데이터를 사용하여 kfold 방식 응용
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
import numpy as np

data = load_iris()
data


{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [3]:
X = data.data
y = data.target
model = DecisionTreeClassifier(random_state=11)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=5)


In [4]:
print("Cross-validation scores:", scores)
print("Mean cross-validation score:", np.mean(scores))


Cross-validation scores: [0.96666667 0.96666667 0.9        0.96666667 1.        ]
Mean cross-validation score: 0.9600000000000002


In [8]:
model.fit(X, y)
y_pred = model.predict(X)
y_pred
df_result = pd.DataFrame(data={'Actual': y, 'Predicted': y_pred})
df_result
df_result['Correct'] = np.where(df_result['Actual'] == df_result['Predicted'], True, False)
df_result
df_result
accuracy = np.mean(df_result['Correct'])
print("Training accuracy:", accuracy)


Training accuracy: 1.0


In [3]:
# greed search CV 적용
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris

data = load_iris()

X_train, X_test, y_train, y_test = train_test_split(
    data.data,
    data.target,
    stratify=data.target,   # 핵심 수정
    test_size=0.2,
    random_state=42
)

model = DecisionTreeClassifier(random_state=11)
parameters = {
    "max_depth": [1, 2, 3, 4, 5],
    "min_samples_split": [2, 3, 4]
}

grid_model = GridSearchCV(
    model,
    param_grid=parameters,
    scoring="accuracy",
    cv=5
)

grid_model.fit(X_train, y_train)

print("best params:", grid_model.best_params_)
print("best cv score:", grid_model.best_score_)
print("test score:", grid_model.score(X_test, y_test))
# kfold 방식을 이용한 교차 검증 예제

best params: {'max_depth': 4, 'min_samples_split': 2}
best cv score: 0.9416666666666667
test score: 0.9666666666666667


In [4]:
import numpy as np
import pandas as pd
grid_model.cv_results_
score_df = pd.DataFrame(grid_model.cv_results_)
score_df[['params', 'mean_test_score', 'std_test_score', 'rank_test_score']]

Unnamed: 0,params,mean_test_score,std_test_score,rank_test_score
0,"{'max_depth': 1, 'min_samples_split': 2}",0.666667,0.0,13
1,"{'max_depth': 1, 'min_samples_split': 3}",0.666667,0.0,13
2,"{'max_depth': 1, 'min_samples_split': 4}",0.666667,0.0,13
3,"{'max_depth': 2, 'min_samples_split': 2}",0.925,0.03118,10
4,"{'max_depth': 2, 'min_samples_split': 3}",0.925,0.03118,10
5,"{'max_depth': 2, 'min_samples_split': 4}",0.925,0.03118,10
6,"{'max_depth': 3, 'min_samples_split': 2}",0.933333,0.020412,7
7,"{'max_depth': 3, 'min_samples_split': 3}",0.933333,0.020412,7
8,"{'max_depth': 3, 'min_samples_split': 4}",0.933333,0.020412,7
9,"{'max_depth': 4, 'min_samples_split': 2}",0.941667,0.020412,1


In [5]:
grid_model.best_estimator_.predict(X_test)

array([0, 2, 1, 1, 0, 1, 0, 0, 2, 1, 2, 2, 2, 1, 0, 0, 0, 1, 1, 2, 0, 2,
       1, 2, 2, 2, 1, 0, 2, 0])