<a href="https://colab.research.google.com/github/tushar4221/First-Repo/blob/main/Untitled19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from joblib import dump
from sklearn import svm, tree
import pdb


def get_all_combs(param_vals, param_name, combs_so_far):
    new_combs_so_far = []        
    for c in combs_so_far:        
        for v in param_vals:
            cc = c.copy()
            cc[param_name] = v
            new_combs_so_far.append(cc)
    return new_combs_so_far


def get_all_h_param_comb(params):
    h_param_comb = [{}]
    for p_name in params:
        h_param_comb = get_all_combs(
            param_vals=params[p_name], param_name=p_name, combs_so_far=h_param_comb
        )

    return h_param_comb


def preprocess_digits(dataset):
    n_samples = len(dataset.images)
    data = dataset.images.reshape((n_samples, -1))
    label = dataset.target
    return data, label

#viz
def data_viz(dataset):
    _, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3))
    for ax, image, label in zip(axes, dataset.images, dataset.target):
        ax.set_axis_off()
        ax.imshow(image, cmap=plt.cm.gray_r, interpolation="nearest")
        ax.set_title("Training: %i" % label)


#Predect
def pred_image_viz(x_test, predictions):
    _, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3))
    for ax, image, prediction in zip(axes, x_test, predictions):
        ax.set_axis_off()
        image = image.reshape(8, 8)
        ax.imshow(image, cmap=plt.cm.gray_r, interpolation="nearest")
        ax.set_title(f"Prediction: {prediction}")


#Train test


def train_dev_test_split(data, label, train_frac, dev_frac):

    dev_test_frac = 1 - train_frac
    x_train, x_dev_test, y_train, y_dev_test = train_test_split(
        data, label, test_size=dev_test_frac, shuffle=True
    )
    x_test, x_dev, y_test, y_dev = train_test_split(
        x_dev_test, y_dev_test, test_size=(dev_frac) / dev_test_frac, shuffle=True
    )

    return x_train, y_train, x_dev, y_dev, x_test, y_test


def h_param_tuning(h_param_comb, clf, x_train, y_train, x_dev, y_dev, metric, verbose=False):
    best_metric = -1.0
    best_model = None
    best_h_params = None
    # 2. H-Values
    for cur_h_params in h_param_comb:

     
        hyper_params = cur_h_params
        clf.set_params(**hyper_params)

        #Train model
      
        clf.fit(x_train, y_train)

        #predictions
        predicted_dev = clf.predict(x_dev)

        # Acc on prediction 
        cur_metric = metric(y_pred=predicted_dev, y_true=y_dev)

        if cur_metric > best_metric:
            best_metric = cur_metric
            best_model = clf
            best_h_params = cur_h_params
            if verbose:
                print("best metric :" + str(cur_h_params))
                print("New best val metric:" + str(cur_metric))
    return best_model, best_metric, best_h_params


def tune_and_save(
    clf, x_train, y_train, x_dev, y_dev, metric, h_param_comb, model_path
):
    best_model, best_metric, best_h_params = h_param_tuning(
        h_param_comb, clf, x_train, y_train, x_dev, y_dev, metric
    )

    # best_model
    best_param_config = "_".join(
        [h + "=" + str(best_h_params[h]) for h in best_h_params]
    )

    if type(clf) == svm.SVC:
        model_type = "svm"

    if type(clf) == tree.DecisionTreeClassifier:
        model_type = "decision_tree"

    best_model_name = model_type + "_" + best_param_config + ".joblib"
    if model_path == None:
        model_path = best_model_name
    dump(best_model, model_path)

    print("Final hyperparameter:" + str(best_h_params))

    print("Best Metric:{}".format(best_metric))

    return model_path


def macro_f1(y_true, y_pred, pos_label=1):
    return f1_score(y_true, y_pred, pos_label=pos_label, average='macro', zero_division='warn')