#### 샘플링 함수

In [46]:
import numpy as np
X = [1, 2, 3, 4, 5]
s = np.random.choice(X)
print(s)

4


In [47]:
from scipy.stats import poisson
poisson_rv = poisson(10)
X = poisson_rv.rvs(5)
print(X)

[4 8 8 7 9]


In [48]:
from scipy.stats import uniform, loguniform
uni_rv = uniform(10, 20)
log_uni_rv = loguniform(10, 10000)
X1 = uni_rv.rvs(10)
X2 = log_uni_rv.rvs(10)

print(X1)
print(X2)

[19.04748286 25.45156741 14.55717218 22.6474233  26.45716228 15.11677365
 13.56747024 22.56724407 21.74957328 17.94179903]
[  70.11776873  148.38559595 9657.54416968   17.58390487  193.7628786
 6894.34262904   42.76057622 1675.86713148 4564.41491917 1009.24129753]


#### RandomSearchCV 클래스

In [49]:
dist = {"max_features":loguniform(0.5, 1),
        "max_depth": range(3, 8),
        "criterion":["gini", "entropy"]}

In [50]:
import pandas as pd
df = pd.read_csv("../../data/classification/movement_libras.csv")
X = df.drop('y', axis = 1)
y = df['y']

In [51]:
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.model_selection import RandomizedSearchCV
clf = RandomizedSearchCV(RFC(random_state = 2022),
                         dist,
                         cv = 5,
                         n_iter = 10,
                         scoring = "accuracy",
                         random_state=2022).fit(X, y)

In [52]:
result = pd.DataFrame(clf.cv_results_)
display(result[['params', 'mean_test_score', 'mean_fit_time']])

Unnamed: 0,params,mean_test_score,mean_fit_time
0,"{'criterion': 'entropy', 'max_depth': 7, 'max_...",0.786111,2.867841
1,"{'criterion': 'entropy', 'max_depth': 4, 'max_...",0.677778,1.705071
2,"{'criterion': 'gini', 'max_depth': 5, 'max_fea...",0.736111,0.931867
3,"{'criterion': 'entropy', 'max_depth': 4, 'max_...",0.697222,2.517144
4,"{'criterion': 'entropy', 'max_depth': 6, 'max_...",0.783333,3.193398
5,"{'criterion': 'gini', 'max_depth': 6, 'max_fea...",0.758333,1.203861
6,"{'criterion': 'gini', 'max_depth': 3, 'max_fea...",0.597222,0.823997
7,"{'criterion': 'gini', 'max_depth': 5, 'max_fea...",0.736111,0.952974
8,"{'criterion': 'gini', 'max_depth': 4, 'max_fea...",0.697222,0.848706
9,"{'criterion': 'entropy', 'max_depth': 4, 'max_...",0.683333,3.210292


In [53]:
print(clf.best_estimator_)
print(clf.best_score_)
print(clf.best_params_)

RandomForestClassifier(criterion='entropy', max_depth=7,
                       max_features=0.7066451376545405, random_state=2022)
0.7861111111111111
{'criterion': 'entropy', 'max_depth': 7, 'max_features': 0.7066451376545405}


#### 직접 구현하기

In [None]:
from sklearn.metrics import *
from sklearn.model_selection import KFold
kf = KFold(n_splits = 5)
best_score = -1
num_iter = 10
for _ in range(num_iter):
    total_score = 0
    for train_index, test_index in kf.split(X):
        X_train = X.loc[train_index] 
        X_test = X.loc[test_index]
        y_train = y.loc[train_index]
        y_test = y.loc[test_index]
        _max_features = loguniform(0.5, 1).rvs(1)[0]
        _max_depth = np.random.choice(range(3, 8))
        _criterion = np.random.choice(["gini", "entropy"])
        
        model = RFC(max_features = _max_features,
                    max_depth = _max_depth,
                    criterion = _criterion).fit(X_train, y_train)
        y_pred = model.predict(X_test)
        score = accuracy_score(y_test, y_pred)
        total_score += score / 5
    if total_score > best_score:
        best_score = total_score
        best_parameter = [_max_features, _max_depth, _criterion]

In [55]:
print(best_parameter, best_score)

[0.6994322394713137, 7, 'gini'] 0.7500000000000001
