In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report


In [2]:
# Load wine dataset
df = pd.read_csv("winequality-red.csv", sep=';')

# Binary classification: Good (>=7), Bad (<7)
df['label'] = df['quality'].apply(lambda x: 1 if x >= 7 else 0)

# Features and target
X = df.drop(['quality', 'label'], axis=1)
y = df['label']

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
param_grid = {
    'n_estimators': [100, 200, 300],         # Number of trees
    'max_depth': [None, 10, 20, 30],         # Max tree depth
    'min_samples_split': [2, 5, 10],         # Min samples to split
    'min_samples_leaf': [1, 2, 4],           # Min samples in a leaf
    'bootstrap': [True, False],              # Sampling method
    'criterion': ['gini', 'entropy']         # Split criterion
}


In [10]:
from sklearn.model_selection import RandomizedSearchCV

rf = RandomForestClassifier(random_state=42)

random_search = RandomizedSearchCV(estimator=rf,
                                   param_distributions=param_grid,
                                   n_iter=50,  # Number of random combinations
                                   cv=5,
                                   scoring='accuracy',
                                   n_jobs=-1,
                                   random_state=42)
# Fit the model
random_search.fit(X_train, y_train)


In [11]:
print("Best Parameters:", random_search.best_params_)

best_rf = random_search.best_estimator_
y_pred = best_rf.predict(X_test)

print(classification_report(y_test, y_pred))


Best Parameters: {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': 20, 'criterion': 'gini', 'bootstrap': False}
              precision    recall  f1-score   support

           0       0.92      0.97      0.94       273
           1       0.73      0.51      0.60        47

    accuracy                           0.90       320
   macro avg       0.82      0.74      0.77       320
weighted avg       0.89      0.90      0.89       320



In [12]:
from sklearn.metrics import classification_report,confusion_matrix, accuracy_score

print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.9
[[264   9]
 [ 23  24]]
              precision    recall  f1-score   support

           0       0.92      0.97      0.94       273
           1       0.73      0.51      0.60        47

    accuracy                           0.90       320
   macro avg       0.82      0.74      0.77       320
weighted avg       0.89      0.90      0.89       320

