## [作業重點]
確保你了解隨機森林模型中每個超參數的意義，並觀察調整超參數對結果的影響

## 作業

1. 試著調整 RandomForestClassifier(...) 中的參數，並觀察是否會改變結果？
2. 改用其他資料集 (boston, wine)，並與回歸模型與決策樹的結果進行比較

In [49]:
from sklearn import datasets, metrics
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
from sklearn.model_selection import train_test_split

In [50]:
wine = datasets.load_wine()

X_train, X_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.2, random_state=7)

rdc = RandomForestClassifier(n_estimators=40, criterion="gini", max_features="auto", max_depth=10, min_samples_split=10, min_samples_leaf=1)
rdc.fit(X_train, y_train)

y_pred = rdc.predict(X_test)

In [51]:
acc = metrics.accuracy_score(y_test, y_pred)
print("Accuracy: ", acc)

Accuracy:  1.0


In [52]:
print(wine.feature_names)

['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']


In [53]:
rdc.predict_proba(X_test)

array([[0.        , 0.04166667, 0.95833333],
       [0.93214286, 0.06785714, 0.        ],
       [0.        , 0.025     , 0.975     ],
       [0.        , 0.08764881, 0.91235119],
       [0.04916667, 0.81496753, 0.1358658 ],
       [0.        , 0.04494048, 0.95505952],
       [0.01286301, 0.94713699, 0.04      ],
       [0.68443588, 0.31556412, 0.        ],
       [0.10677083, 0.84641098, 0.04681818],
       [0.05400787, 0.38766338, 0.55832875],
       [0.75446429, 0.24553571, 0.        ],
       [0.00227273, 0.99772727, 0.        ],
       [0.03838287, 0.3433479 , 0.61826923],
       [0.018125  , 0.97625   , 0.005625  ],
       [0.        , 1.        , 0.        ],
       [0.01859217, 0.96015783, 0.02125   ],
       [0.17118056, 0.79256944, 0.03625   ],
       [0.00227273, 0.16174242, 0.83598485],
       [0.95382576, 0.03367424, 0.0125    ],
       [0.975     , 0.        , 0.025     ],
       [0.18532828, 0.78842172, 0.02625   ],
       [0.33035256, 0.58158383, 0.0880636 ],
       [0.

In [54]:
boston = datasets.load_boston()

X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.2, random_state=7)

rdf = RandomForestRegressor(n_estimators=40, criterion="mae", max_features="auto", max_depth=10, min_samples_split=10, min_samples_leaf=1)
rdf.fit(X_train, y_train)

y_pred = rdf.predict(X_test)

In [55]:
mae = metrics.mean_squared_error(y_test, y_pred)
print("mae: ", mae)

mae:  17.812310600490203


In [56]:
print(boston.feature_names)

['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']


In [57]:
rdf.predict(X_test)

array([19.58375, 20.12125, 20.8325 , 20.66125,  8.0125 ,  9.4575 ,
       19.36   , 22.67375, 26.77875, 15.825  ,  7.77   , 32.0925 ,
       16.13375, 18.94   , 42.02625, 19.26   , 24.1175 , 32.50125,
       15.47625, 21.62375, 16.285  , 31.21625, 43.6975 , 19.6525 ,
       14.53375, 14.605  , 34.63875, 28.07   , 22.88125, 23.56125,
       20.1325 , 30.53375, 31.565  , 15.68625, 43.42625, 18.33875,
       20.11875, 16.19625, 20.44875, 28.4875 , 22.25   , 13.275  ,
       15.5775 , 27.3175 , 17.86125, 12.76375, 21.465  , 20.28875,
       18.05   , 20.56375, 24.52875, 24.8225 , 22.70625, 44.06625,
       11.27875, 19.91875, 17.95   , 19.5175 , 21.97875, 20.55   ,
       20.53125, 32.51875, 19.41625, 20.7775 , 20.9625 , 48.26875,
       19.0675 , 19.47125, 22.28125, 28.8375 , 23.01375,  9.69375,
       19.75375, 33.52375, 21.35125, 19.73   , 14.8    , 15.7425 ,
       31.2625 , 22.08375, 23.41875, 25.8175 , 20.6375 , 35.76   ,
       24.90625, 25.83625, 22.86625, 18.6275 , 34.10375, 23.44