## Grid Search

In [1]:
from metal.contrib.slicing.synthetics.geometric_synthetics import generate_dataset
from metal.utils import split_data
from metal.contrib.backends.snorkel_gm_wrapper import SnorkelLabelModel

NUM_TRIALS = 20
NUM_SLICES = 5
K = 2
M = 20
N = 5000
seed = None

Z_kwargs = {'num_slices': NUM_SLICES}
L, X, Y, Z, targeting_lfs_idx = generate_dataset(K, M, N, 
                                                 Z_kwargs=Z_kwargs,
                                                 return_targeting_lfs=True,
                                                 seed=seed)

Ls, Xs, Ys, Zs = split_data(L, X, Y, Z, splits=[0.5, 0.25, 0.25], shuffle=True)

label_model = SnorkelLabelModel()
label_model.train_model(Ls[0])
Y_train = label_model.predict_proba(Ls[0])
Ys[0] = Y_train

In [2]:
layer_out_dims = [2, 10, 10, 2]
search_space = {
    'n_epochs': [10, 20, 40],
    'lr': {'range': [0.001, 1], 'scale': 'log'} ,
    'l2': 0, #{'range': [0.0001, 10], 'scale': 'log'},
    'slice_weight': [0.01, 0.05, 0.1, 0.2, 0.5],
    # 'batch_size': None <-- This is handled in DataLoader
}

In [3]:
import torch
from torch.utils.data import DataLoader

from metal.tuners import RandomSearchTuner
from metal.contrib.slicing.online_dp import SliceHatModel
from metal.utils import SlicingDataset
from metal.end_model import EndModel

batch_size = 32
L_train = torch.Tensor(Ls[0].todense())
dataset = SlicingDataset(Xs[0], L_train, Ys[0])
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
end_model = EndModel(layer_out_dims, verbose=False)

tuner = RandomSearchTuner(SliceHatModel, log_dir='checkpoints')
_ = tuner.search(
    search_space,
    dev_data=(Xs[1], Ys[1]),
    max_search=20,
    init_args=[end_model, M],
    init_kwargs={'verbose': False},
    train_args=[train_loader],
    train_kwargs={'verbose': True, 'disable_prog_bar': True, 'print_every': 5},
    verbose=True,
)

[0] Testing {'n_epochs': 40, 'slice_weight': 0.1, 'lr': 0.0034833194652165505}
Saving model at iteration 0 with best score 0.506
[E:0]	Train Loss: 0.660	Dev accuracy: 0.506
Saving model at iteration 1 with best score 0.732
Saving model at iteration 3 with best score 0.765
Saving model at iteration 5 with best score 0.783
[E:5]	Train Loss: 0.627	Dev accuracy: 0.783
[E:10]	Train Loss: 0.623	Dev accuracy: 0.781
Saving model at iteration 15 with best score 0.784
[E:15]	Train Loss: 0.622	Dev accuracy: 0.784
Saving model at iteration 17 with best score 0.785
Saving model at iteration 19 with best score 0.788
[E:20]	Train Loss: 0.621	Dev accuracy: 0.787
Saving model at iteration 21 with best score 0.790
Saving model at iteration 23 with best score 0.794
[E:25]	Train Loss: 0.621	Dev accuracy: 0.790
[E:30]	Train Loss: 0.621	Dev accuracy: 0.793
Saving model at iteration 31 with best score 0.795
[E:35]	Train Loss: 0.620	Dev accuracy: 0.792
[E:39]	Train Loss: 0.620	Dev accuracy: 0.792
Restoring be

Saving model at iteration 2 with best score 0.679
Saving model at iteration 3 with best score 0.740
Saving model at iteration 5 with best score 0.750
[E:5]	Train Loss: 0.657	Dev accuracy: 0.750
Saving model at iteration 6 with best score 0.750
Saving model at iteration 8 with best score 0.760
[E:10]	Train Loss: 0.655	Dev accuracy: 0.760
Saving model at iteration 11 with best score 0.766
Saving model at iteration 13 with best score 0.767
Saving model at iteration 14 with best score 0.770
Saving model at iteration 15 with best score 0.775
[E:15]	Train Loss: 0.654	Dev accuracy: 0.775
Saving model at iteration 16 with best score 0.778
Saving model at iteration 18 with best score 0.781
Saving model at iteration 20 with best score 0.785
[E:20]	Train Loss: 0.653	Dev accuracy: 0.785
Saving model at iteration 21 with best score 0.786
Saving model at iteration 22 with best score 0.787
Saving model at iteration 24 with best score 0.788
[E:25]	Train Loss: 0.652	Dev accuracy: 0.786
[E:30]	Train Los

[14] Testing {'n_epochs': 10, 'slice_weight': 0.01, 'lr': 0.0012534956634309685}
Saving model at iteration 0 with best score 0.275
[E:0]	Train Loss: 0.736	Dev accuracy: 0.275
Saving model at iteration 2 with best score 0.679
Saving model at iteration 3 with best score 0.739
Saving model at iteration 5 with best score 0.748
[E:5]	Train Loss: 0.679	Dev accuracy: 0.748
Saving model at iteration 6 with best score 0.750
Saving model at iteration 8 with best score 0.762
[E:9]	Train Loss: 0.677	Dev accuracy: 0.756
Restoring best model from iteration 8 with score 0.762
Finished Training
Accuracy: 0.762
        y=1    y=2   
 l=1    240    194   
 l=2    104    712   
[15] Testing {'n_epochs': 40, 'slice_weight': 0.1, 'lr': 0.0012534956634309685}
Saving model at iteration 0 with best score 0.275
[E:0]	Train Loss: 0.681	Dev accuracy: 0.275
Saving model at iteration 2 with best score 0.676
Saving model at iteration 3 with best score 0.739
Saving model at iteration 5 with best score 0.746
[E:5]	Tr