In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier


In [5]:
data = pd.read_csv("/content/Wholesale customers data - Wholesale customers data.csv")


In [6]:
X = data.drop("Channel", axis=1)
y = data["Channel"]


In [7]:
label_encoder = LabelEncoder()
X["Region"] = label_encoder.fit_transform(X["Region"])


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("classifier", None)
])


In [11]:
param_grid = [
    {
        "classifier": [RandomForestClassifier()],
        "classifier__n_estimators": [100, 200, 300],
        "classifier__max_depth": [None, 5, 10]
    },
    {
        "classifier": [SVC()],
        "classifier__kernel": ["linear", "rbf"],
        "classifier__C": [1, 10, 100]
    },
    {
        "classifier": [DecisionTreeClassifier()],
        "classifier__max_depth": [None, 5, 10],
        "classifier__min_samples_split": [2, 5, 10]
    }
]


In [12]:
grid_search = GridSearchCV(pipeline, param_grid, cv=5)
grid_search.fit(X_train, y_train)


TypeError: ignored

In [13]:
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)


In [14]:
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)


Best Parameters: {'classifier': RandomForestClassifier(), 'classifier__max_depth': None, 'classifier__n_estimators': 100}
Best Score: 0.9261569416498994


In [15]:
classifiers = ["Random Forest", "SVM", "Decision Tree"]
for i, classifier in enumerate(grid_search.cv_results_["params"]):
    print(f"Classifier: {classifiers[i]}")
    pipeline.set_params(**classifier)
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    print(classification_report(y_test, y_pred))
    print("-" * 40)


Classifier: Random Forest
              precision    recall  f1-score   support

           1       0.97      0.91      0.94        65
           2       0.78      0.91      0.84        23

    accuracy                           0.91        88
   macro avg       0.87      0.91      0.89        88
weighted avg       0.92      0.91      0.91        88

----------------------------------------
Classifier: SVM
              precision    recall  f1-score   support

           1       0.97      0.91      0.94        65
           2       0.78      0.91      0.84        23

    accuracy                           0.91        88
   macro avg       0.87      0.91      0.89        88
weighted avg       0.92      0.91      0.91        88

----------------------------------------
Classifier: Decision Tree
              precision    recall  f1-score   support

           1       0.97      0.89      0.93        65
           2       0.75      0.91      0.82        23

    accuracy                    

IndexError: ignored

In [16]:
classifiers = ["Random Forest", "SVM", "Decision Tree"]
for classifier, params in zip(classifiers, grid_search.cv_results_["params"]):
    print(f"Classifier: {classifier}")
    pipeline.set_params(**params)
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    print(classification_report(y_test, y_pred))
    print("============================================")


Classifier: Random Forest
              precision    recall  f1-score   support

           1       0.97      0.91      0.94        65
           2       0.78      0.91      0.84        23

    accuracy                           0.91        88
   macro avg       0.87      0.91      0.89        88
weighted avg       0.92      0.91      0.91        88

Classifier: SVM
              precision    recall  f1-score   support

           1       0.97      0.91      0.94        65
           2       0.78      0.91      0.84        23

    accuracy                           0.91        88
   macro avg       0.87      0.91      0.89        88
weighted avg       0.92      0.91      0.91        88

Classifier: Decision Tree
              precision    recall  f1-score   support

           1       0.97      0.91      0.94        65
           2       0.78      0.91      0.84        23

    accuracy                           0.91        88
   macro avg       0.87      0.91      0.89        88
weight