### 모델 학습

In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
os.chdir("../../data")
df1 = pd.read_csv("classification/sonar.csv")
df2 = pd.read_csv("classification/iris.csv")
df3 = pd.read_csv("regression/wankara.csv")

In [2]:
X1 = df1.drop('y', axis = 1)
y1 = df1['y']
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, random_state = 2022)

X2 = df2.drop('y', axis = 1)
y2 = df2['y']
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, random_state = 2022)

X3 = df3.drop('y', axis = 1)
y3 = df3['y']
X3_train, X3_test, y3_train, y3_test = train_test_split(X3, y3, random_state = 2022)

#### 모델 인스턴스화

In [3]:
from sklearn.tree import DecisionTreeClassifier as DTC
model = DTC(max_depth = 10)
print(model)

DecisionTreeClassifier(max_depth=10)


In [4]:
parameter = {"max_depth": 10}
model = DTC(**parameter)
print(model)

DecisionTreeClassifier(max_depth=10)


In [5]:
from sklearn.tree import DecisionTreeRegressor as DTR
model1 = DTC(max_depth = 10)
model2 = DTC(max_depth = 10)
model3 = DTR(max_depth = 10)

In [6]:
model1.fit(X1_train, y1_train)
model2.fit(X2_train, y2_train)
model3.fit(X3_train, y3_train)

DecisionTreeRegressor(max_depth=10)

### 모델 평가

#### predict 메서드

In [7]:
y1_pred = model1.predict(X1_test)
y2_pred = model2.predict(X2_test)
y3_pred = model3.predict(X3_test)

In [8]:
display(y1_pred[:5])

array([1, 0, 0, 1, 1], dtype=int64)

#### predict_proba 메서드

In [9]:
y1_prob = model1.predict_proba(X1_test)
y2_prob = model2.predict_proba(X2_test)
display(y2_prob[:5])

array([[0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]])

#### 분류 모델 평가

In [10]:
from sklearn.metrics import *
acc = accuracy_score(y1_test, y1_pred)
pre = precision_score(y1_test, y1_pred)
rec = recall_score(y1_test, y1_pred)
f1 = f1_score(y1_test, y1_pred)
print(acc, pre, rec, f1)

0.7307692307692307 0.64 0.7619047619047619 0.6956521739130435


In [11]:
f1_score(y2_test, y2_pred)

ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

In [12]:
macro_f1 = f1_score(y2_test, y2_pred, average = "macro")
weighted_f1 = f1_score(y2_test, y2_pred, average = "weighted")
print(macro_f1, weighted_f1)

0.9375 0.9457236842105263


#### 예측 모델 평가

In [13]:
mae = mean_absolute_error(y3_pred, y3_test)
mse = mean_squared_error(y3_pred, y3_test)
rmse = mse ** 0.5
print(mae, mse, rmse)

1.5777579993251636 3.952373092128416 1.988057617909606
