In [17]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score

In [18]:
df = pd.read_csv('Minggu 6/data/mushrooms.csv')

# Display the first few rows of the dataset
df.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g


In [19]:
label_encoders = {}
for column in df.columns:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])

In [20]:
X = df.drop('class', axis=1)
y = df['class']

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [22]:
dt_params = {
    'max_depth': [None, 10, 20, 30, 40, 50],
    'min_samples_split': [2, 10, 20],
    'min_samples_leaf': [1, 5, 10]
}

dt = DecisionTreeClassifier(random_state=42)
dt_grid = GridSearchCV(dt, dt_params, cv=5, scoring='accuracy')
dt_grid.fit(X_train, y_train)

In [23]:
best_dt = dt_grid.best_estimator_
dt_accuracy = accuracy_score(y_test, best_dt.predict(X_test))

In [24]:
ada_params = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1]
}

ada = AdaBoostClassifier(random_state=42, algorithm='SAMME')
ada_grid = GridSearchCV(ada, ada_params, cv=5, scoring='accuracy')
ada_grid.fit(X_train, y_train)

In [25]:
best_ada = ada_grid.best_estimator_
ada_accuracy = accuracy_score(y_test, best_ada.predict(X_test))

In [26]:
best_dt_params = dt_grid.best_params_
best_ada_params = ada_grid.best_params_

In [27]:
print(f'Best Parameters for Decision Tree: {best_dt_params}')
print(f'Best Parameters for Random Forest: {best_ada_params}')

Best Parameters for Decision Tree: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
Best Parameters for Random Forest: {'learning_rate': 1, 'n_estimators': 200}


In [28]:
print(f'Best Accuracy Decision Tree on Test Set: {dt_accuracy}')
print(f'Best Accuracy AdaBoost on Test Set: {ada_accuracy}')

Best Accuracy Decision Tree on Test Set: 1.0
Best Accuracy AdaBoost on Test Set: 1.0
