# Hardcore Debugging

In [1]:
import os
import itertools

from multiprocessing import Pool

import joblib

import numpy as np

from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics

import grpc

from verta import ModelDBClient

## Instantiate Client

In [2]:
client = ModelDBClient("me@me.me", "skeleton")
proj = client.set_project("Income Classification")
expt = client.set_experiment("Logistic Regression")

In [3]:
TRAIN_DATA_PATH = os.path.join("..", "data", "census", "train.npz")
TEST_DATA_PATH = os.path.join("..", "data", "census", "test.npz")
MODEL_PATH = os.path.join("..", "output", "client-demo", "logreg_gridsearch_{}.gz")

## Design Validation Run

In [4]:
hyperparam_candidates = {
    'C': [1e-4, 1e-3, 1e-2, 1e-1, 1],
    'solver': ['lbfgs'],
    'max_iter': [1e4, 1e5],
}
hyperparam_sets = [dict(zip(hyperparam_candidates.keys(), values))
                   for values
                   in itertools.product(*hyperparam_candidates.values())]

def run_experiment(hyperparams):
    # create object to track experiment run
    run = client.set_experiment_run()
        
    # create and log data
    train_data = np.load(TRAIN_DATA_PATH)
    X_train, y_train = train_data['X'], train_data['y']
    run.log_dataset("train data", TRAIN_DATA_PATH)
    run.log_dataset("test data", TEST_DATA_PATH)
    
    # create validation split
    X_train, X_val, y_train, y_val = model_selection.train_test_split(X_train, y_train,
                                                                      test_size=0.1, shuffle=False)
    # log hyperparameters
    for key, val in hyperparams.items():
        run.log_hyperparameter(key, val)
    print(hyperparams, end=' ')
    
    # create and train model
    model = linear_model.LogisticRegression(**hyperparams)
    model.fit(X_train, y_train)
    
    # calculate and log validation accuracy
    val_acc = model.score(X_val, y_val)
    run.log_metric("val acc", val_acc)
    print(f"Validation accuracy: {val_acc:.4f}")
    
    # save and log model
    joblib.dump(model, MODEL_PATH.format(run.id))
    run.log_model("model", MODEL_PATH.format(run.id))

## Run Validation

In [5]:
with Pool() as p:
    p.map(run_experiment, hyperparam_sets)

{'C': 0.0001, 'solver': 'lbfgs', 'max_iter': 100000.0} Validation accuracy: 0.7988
{'C': 0.0001, 'solver': 'lbfgs', 'max_iter': 10000.0} Validation accuracy: 0.7988
{'C': 0.001, 'solver': 'lbfgs', 'max_iter': 10000.0} Validation accuracy: 0.8093
{'C': 0.001, 'solver': 'lbfgs', 'max_iter': 100000.0} Validation accuracy: 0.8093
{'C': 0.01, 'solver': 'lbfgs', 'max_iter': 100000.0} Validation accuracy: 0.8535
{'C': 0.01, 'solver': 'lbfgs', 'max_iter': 10000.0} Validation accuracy: 0.8535
{'C': 0.1, 'solver': 'lbfgs', 'max_iter': 10000.0} Validation accuracy: 0.8563
{'C': 0.1, 'solver': 'lbfgs', 'max_iter': 100000.0} Validation accuracy: 0.8563
{'C': 1, 'solver': 'lbfgs', 'max_iter': 10000.0} Validation accuracy: 0.8579
{'C': 1, 'solver': 'lbfgs', 'max_iter': 100000.0} Validation accuracy: 0.8579
