<a href="https://colab.research.google.com/github/tushar4221/First-Repo/blob/main/Untitled18.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn import datasets, svm, metrics, tree
import pdb

from utils import (
    preprocess_digits,
    train_dev_test_split,
    data_viz,
    get_all_h_param_comb,
    tune_and_save,
    macro_f1
)
from joblib import dump, load

train_frac, dev_frac, test_frac = 0.8, 0.1, 0.1
assert train_frac + dev_frac + test_frac == 1.0

# 1.Hyper Parameter range 
gamma_list = [0.01, 0.001, 0.0001, 0.005, 0.0005 ]
c_list = [0.1, 0.2, 0.5, 0.7, 1, 2, 5, 7, 10]


svm_params = {}
svm_params["gamma"] = gamma_list
svm_params["C"] = c_list
svm_h_param_comb = get_all_h_param_comb(svm_params)

max_depth_list = [2, 10, 20, 50, 100]

dec_params = {}
dec_params["max_depth"] = max_depth_list
dec_h_param_comb = get_all_h_param_comb(dec_params)

h_param_comb = {"svm": svm_h_param_comb, "decision_tree": dec_h_param_comb}

# PART: load dataset
digits = datasets.load_digits()
data_viz(digits)
data, label = preprocess_digits(digits)

del digits

# Evaluation Metric
metric_list = [metrics.accuracy_score, macro_f1]
h_metric = metrics.accuracy_score

n_cv = 5
results = {}
for n in range(n_cv):
    x_train, y_train, x_dev, y_dev, x_test, y_test = train_dev_test_split(
        data, label, train_frac, dev_frac
    )
    # PART: Model Processing
    # classifier: SVM classifier
    models_of_choice = {
        "svm": svm.SVC(),
        "decision_tree": tree.DecisionTreeClassifier(),
    }
    for clf_name in models_of_choice:
        clf = models_of_choice[clf_name]
        print("[{}] Running hyper param tuning for {}".format(n,clf_name))
        actual_model_path = tune_and_save(
            clf, x_train, y_train, x_dev, y_dev, h_metric, h_param_comb[clf_name], model_path=None
        )

        # 2. Best Model
        best_model = load(actual_model_path)

        # PART: Get test set predictions
        # Prediction 
        predicted = best_model.predict(x_test)
        if not clf_name in results:
            results[clf_name]=[]    

        results[clf_name].append({m.__name__:m(y_pred=predicted, y_true=y_test) for m in metric_list})
        # 4. Best model accuracy 
        # PART: Evaluation Metrics
        print(
            f" classifier from Classification report {clf}:\n"
            f"{metrics.classification_report(y_test, predicted)}\n"
        )

print(results)
