Run MLP (first layer weights fixed) on mnist and compute bias and variance

In [1]:
%matplotlib inline
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import queue
import os
import sys
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import copy 
import pandas as pd
class MLP(nn.Module):
    def __init__(self, p, d, o):
        """RF_models
        
        Args:
            p (int): the hidden size
            d (int): the input feature dimension
            o (int): the output dimension
        """
        super(MLP, self).__init__()

        self.fc1 = nn.Linear(d, p, bias=False)
        self.fc2 = nn.Linear(p, o, bias=False)
        self.p = p
        self.d = d
        self.o = o 
    def forward(self, x):
        o = F.relu(self.fc1(x))
        o = self.fc2(o)
        return o
class Ensemble_Two_Layer_NN(object):
    def __init__(self, n_classifiers, p, d=784, o=10):
        """Ensemble_Two_Layer_NN
        
        Args:
            p (int): the hidden size
            d (int, optional): the input feature dimension
            o (int, optional): the output dimension
            coef (float, optional): the ridge regression penalty coefficient
        """
        self.n_classifiers = n_classifiers
        self.p = p
        self.d = d 
        self.o = o 
        self.coef = coef
        self.learners = queue.LifoQueue(maxsize = self.n_classifiers)
        self.MODEL_TYPE = MLP
    def __len__(self):
        return len(self.learners.queue)
    
    def put_model_rho(self, model, rho):
        self.learners.put([model, rho])
    def get_init_model(self, cuda=True):
        model = self.MODEL_TYPE(self.p, self.d, self.o)
        if cuda:
            model.cuda()
        return model
    def cuda(self):
        if len(self) == 0:
            return 
        else:
            for model, rho in self.learners.queue:
                model.cuda()
            return
    def train(self):
        if len(self)!=0:
            for model, rho in self.learners.queue:
                model.train()
    def eval(self):
        if len(self)!=0:
            for model, rho in self.learners.queue:
                model.eval()
    def forward(self, x):
        Bs = x.size(0)
        if len(self) == 0:
            zeros = torch.zeros(Bs, self.o)
            zeros = zeros.to(x.device)
            return zeros
        else:
            outputs = torch.zeros(Bs, self.o)
            outputs = outputs.to(x.device) 
            for model, rho in self.learners.queue:
                output = model(x)
                outputs += rho*output
            return outputs


In [2]:
def get_subsample_dataset(trainset, subset):
    trainsubset = copy.deepcopy(trainset)
    trainsubset.data = [trainsubset.data[index] for index in subset]
    trainsubset.targets = [trainsubset.targets[index] for index in subset]
    return trainsubset
def fix_width_number(width, n_classifiers):
    return max(1, width//n_classifiers)

# Training
def train(net, trainset, permute_index, train_size, num_iters, lr, batch_size, coef):
    net.train()
    subsample_indexes = np.random.choice(permute_index, size=train_size)
    trainsubset = get_subsample_dataset(trainset, subsample_indexes)
    trainloader = torch.utils.data.DataLoader(trainsubset, batch_size=batch_size, shuffle=True)

    for i_c in range(net.n_classifiers):
        i_iter = 0
        model = net.get_init_model(cuda=True)
        rho = 1/net.n_classifiers
        optimizer = torch.optim.SGD(model.fc2.parameters(), lr=lr)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = num_iters//3, gamma = 0.1)
        while i_iter < num_iters:
            for inputs, targets in trainloader:
                Bs = inputs.size(0)
                inputs = inputs.reshape(Bs, -1)
                inputs, targets = inputs.cuda(), targets.cuda()
                targets_onehot = torch.FloatTensor(targets.size(0), net.o).cuda()
                targets_onehot.zero_()
                targets_onehot.scatter_(1, targets.view(-1, 1).long(), 1)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, targets_onehot)
                l2_reg = 0
                for param in model.fc2.parameters():
                    l2_reg += coef * torch.norm(param)
                mse_loss = loss.item()
                l2_loss = l2_reg.item()
                loss += l2_reg
                loss.backward()
                optimizer.step()
                lr_scheduler.step()
                string = "Train {} model: Iters [{}/{}] mse: {:.4f}, l2_loss: {:.4f}, train_loss:{:.4f}".format(i_c+1, i_iter, num_iters,  mse_loss, l2_loss, loss.item())
                sys.stdout.write(string+"\r")
                sys.stdout.flush()
                i_iter +=1
        net.put_model_rho(model, rho)
    # after all models were trained, estimate the mse error
    train_loss = 0
    correct = 0
    total = 0
    for inputs, targets in trainloader:
        Bs = inputs.size(0)
        inputs = inputs.reshape(Bs, -1)
        inputs, targets = inputs.cuda(), targets.cuda()
        targets_onehot = torch.FloatTensor(targets.size(0), net.o).cuda()
        targets_onehot.zero_()
        targets_onehot.scatter_(1, targets.view(-1, 1).long(), 1)
        outputs = net.forward(inputs)
        loss = criterion(outputs, targets_onehot)
        train_loss = loss.item() * outputs.numel()
        _, predicted = outputs.max(1)
        correct = predicted.eq(targets).sum().item()
        total = targets.size(0)
    return train_loss/ total , 100. * correct / total

# Test
def test(net, testloader):
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.cuda(), targets.cuda()
            Bs = inputs.size(0)
            inputs = inputs.reshape(Bs, -1)
            targets_onehot = torch.FloatTensor(targets.size(0), net.o).cuda()
            targets_onehot.zero_()
            targets_onehot.scatter_(1, targets.view(-1, 1).long(), 1)
            outputs = net.forward(inputs)
            loss = criterion(outputs, targets_onehot)
            test_loss += loss.item() * outputs.numel()
            _, predicted = outputs.max(1)
            correct += predicted.eq(targets).sum().item()
            total += targets.size(0)
    return test_loss / total, 100. * correct / total

def compute_bias_variance(net, testloader, trial, OUTPUST_SUM, OUTPUTS_SUMNORMSQUARED):
    net.eval()
    bias2 = 0
    variance = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.cuda(), targets.cuda()
            Bs = inputs.size(0)
            inputs = inputs.reshape(Bs, -1)
            targets_onehot = torch.FloatTensor(targets.size(0), net.o).cuda()
            targets_onehot.zero_()
            targets_onehot.scatter_(1, targets.view(-1, 1).long(), 1)
            outputs = net.forward(inputs)
            OUTPUST_SUM[total:(total + targets.size(0)), :] += outputs
            OUTPUTS_SUMNORMSQUARED[total:total + targets.size(0)] += outputs.norm(dim=1) ** 2.0

            bias2 += (OUTPUST_SUM[total:total + targets.size(0), :] / (trial + 1) - targets_onehot).norm() ** 2.0
            variance += OUTPUTS_SUMNORMSQUARED[total:total + targets.size(0)].sum()/(trial + 1) - (OUTPUST_SUM[total:total + targets.size(0), :]/(trial + 1)).norm() ** 2.0
            total += targets.size(0)

    return bias2 / total, variance / total


In [3]:
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=4)
# loss definition
criterion = nn.MSELoss(reduction='mean').cuda()

def run_exps_sgd(train_sizes, N_Ds, P_Ns, trainset, test_size, feature_dim, num_classes, num_trials, coef,
             outdir, save_csv, num_iters, lr, batch_size_list, K = 1):
    df = pd.DataFrame()
    for batch_size in batch_size_list:
        for train_size in train_sizes:
            hidden_sizes = P_Ns * train_size
            hidden_sizes = np.unique([int(np.around(x)) for x in hidden_sizes])
            for hidden_size in hidden_sizes:
                TRAIN_ACC_SUM = 0.0
                TEST_ACC_SUM = 0.0
                TRAIN_LOSS_SUM = 0.0
                TEST_LOSS_SUM = 0.0
                permute_index = np.random.permutation(len(trainset))
                OUTPUST_SUM = torch.Tensor(test_size, num_classes).zero_().cuda()
                OUTPUTS_SUMNORMSQUARED = torch.Tensor(test_size).zero_().cuda()
                for trial in range(num_trials):
                    net = Ensemble_Two_Layer_NN(n_classifiers = K, p = fix_width_number(hidden_size, K), d=feature_dim, o=num_classes)
                    net.cuda()
                    train_loss, train_acc = train(net, trainset, permute_index, train_size,
                                                 num_iters, lr, batch_size, coef)
                    test_loss, test_acc = test(net, testloader)

                    TRAIN_LOSS_SUM += train_loss
                    TEST_LOSS_SUM += test_loss
                    TRAIN_ACC_SUM += train_acc
                    TEST_ACC_SUM += test_acc

                    # compute bias and variance
                    bias2, variance = compute_bias_variance(net, testloader, trial, OUTPUST_SUM, OUTPUTS_SUMNORMSQUARED)
                    variance_unbias = variance * num_trials / (num_trials - 1.0)
                    bias2_unbias = TEST_LOSS_SUM / (trial + 1) - variance_unbias
                    print('Train size: [{}] hidden size: [{}] batch size: [{}] trial: {}, train_loss: {:.6f}, train acc: {}, test loss: {:.6f}, test acc: {}, bias2: {}, variance: {}'.format(
                        train_size, hidden_size, batch_size,
                        trial, TRAIN_LOSS_SUM / (trial + 1), TRAIN_ACC_SUM / (trial + 1), TEST_LOSS_SUM / (trial + 1),
                        TEST_ACC_SUM / (trial + 1), bias2_unbias, variance_unbias))
                    torch.cuda.empty_cache()
                print('#'*50)
                df = df.append({'train_size': train_size, 'hidden_size':hidden_size, 'batch_size': batch_size,
                                'train_loss': TRAIN_LOSS_SUM / (trial + 1), 'train_acc': TRAIN_ACC_SUM / (trial + 1),
                                'test_loss': TEST_LOSS_SUM / (trial + 1), 'test_acc': TEST_ACC_SUM / (trial + 1), 
                               'variance': variance_unbias.item(),
                               'bias2': bias2_unbias.item()}, ignore_index=True)
                df.to_csv(os.path.join(outdir, save_csv))
    df.to_csv(os.path.join(outdir, save_csv))

In [None]:
num_classes = 10
num_trials = 50
coef = 0.0001
N_Ds = [1]
feature_dim = 784
lr = 0.01 
batch_size_list = [10, 784]
train_sizes = [int(np.around(x*feature_dim)) for x in N_Ds]
test_size = 10000
P_Ns = 10** np.linspace(-2, 1, 50)

for num_iters in [500, 5000]:
    outdir = 'mnist_SGD/num_iters_{}_coef={}'.format(num_iters, coef)
    print(outdir)
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    run_exps_sgd(train_sizes, N_Ds, P_Ns, trainset, test_size, feature_dim, num_classes, num_trials, coef,
             outdir, 'singleNN_output.csv', num_iters = num_iters, lr=lr, batch_size_list=batch_size_list,  K = 1)
    run_exps_sgd(train_sizes, N_Ds, P_Ns, trainset, test_size, feature_dim, num_classes, num_trials, coef,
                 outdir, 'ensembleNNK=2_output.csv', num_iters = num_iters, lr=lr, batch_size_list=batch_size_list,  K = 2)

mnist_SGD/num_iters_500_coef=0.0001
Train size: [784] hidden size: [8] batch size: [10] trial: 0, train_loss: 1.082811, train acc: 0.0, test loss: 1.151223, test acc: 17.67, bias2: 1.1512227058410645, variance: -2.919411101753866e-10
Train size: [784] hidden size: [8] batch size: [10] trial: 1, train_loss: 1.261458, train acc: 0.0, test loss: 1.295968, test acc: 14.780000000000001, bias2: 1.1295108795166016, variance: 0.1664571762084961
Train size: [784] hidden size: [8] batch size: [10] trial: 2, train_loss: 1.307112, train acc: 0.0, test loss: 1.321589, test acc: 14.57, bias2: 1.0829147100448608, variance: 0.23867428302764893
Train size: [784] hidden size: [8] batch size: [10] trial: 3, train_loss: 1.294072, train acc: 0.0, test loss: 1.340458, test acc: 12.342500000000001, bias2: 1.0593767166137695, variance: 0.28108102083206177
Train size: [784] hidden size: [8] batch size: [10] trial: 4, train_loss: 1.383258, train acc: 0.0, test loss: 1.359596, test acc: 12.16, bias2: 1.058304071

Train size: [784] hidden size: [8] batch size: [10] trial: 38, train_loss: 1.298965, train acc: 5.769230769230769, test loss: 1.342376, test acc: 10.10282051282051, bias2: 0.978598952293396, variance: 0.3637765944004059
Train size: [784] hidden size: [8] batch size: [10] trial: 39, train_loss: 1.292434, train acc: 6.25, test loss: 1.341078, test acc: 10.113499999999998, bias2: 0.9763325452804565, variance: 0.36474499106407166
Train size: [784] hidden size: [8] batch size: [10] trial: 40, train_loss: 1.292440, train acc: 6.097560975609756, test loss: 1.338911, test acc: 10.12341463414634, bias2: 0.9764930009841919, variance: 0.3624175786972046
Train size: [784] hidden size: [8] batch size: [10] trial: 41, train_loss: 1.288635, train acc: 5.9523809523809526, test loss: 1.335548, test acc: 9.989047619047616, bias2: 0.9767924547195435, variance: 0.3587553799152374
Train size: [784] hidden size: [8] batch size: [10] trial: 42, train_loss: 1.285624, train acc: 5.813953488372093, test loss: 1

Train size: [784] hidden size: [9] batch size: [10] trial: 26, train_loss: 1.278606, train acc: 12.962962962962964, test loss: 1.297617, test acc: 10.40037037037037, bias2: 0.936322808265686, variance: 0.361294150352478
Train size: [784] hidden size: [9] batch size: [10] trial: 27, train_loss: 1.284965, train acc: 13.392857142857142, test loss: 1.292711, test acc: 10.4225, bias2: 0.9353330135345459, variance: 0.35737839341163635
Train size: [784] hidden size: [9] batch size: [10] trial: 28, train_loss: 1.277033, train acc: 12.931034482758621, test loss: 1.288423, test acc: 10.539310344827586, bias2: 0.9317633509635925, variance: 0.3566599488258362
Train size: [784] hidden size: [9] batch size: [10] trial: 29, train_loss: 1.278571, train acc: 12.5, test loss: 1.285225, test acc: 10.452, bias2: 0.9317762851715088, variance: 0.35344839096069336
Train size: [784] hidden size: [9] batch size: [10] trial: 30, train_loss: 1.287623, train acc: 12.096774193548388, test loss: 1.291310, test acc:

Train size: [784] hidden size: [10] batch size: [10] trial: 14, train_loss: 1.298010, train acc: 8.333333333333334, test loss: 1.306428, test acc: 11.213333333333335, bias2: 0.9733267426490784, variance: 0.33310168981552124
Train size: [784] hidden size: [10] batch size: [10] trial: 15, train_loss: 1.312029, train acc: 7.8125, test loss: 1.306842, test acc: 11.02875, bias2: 0.9749418497085571, variance: 0.33189988136291504
Train size: [784] hidden size: [10] batch size: [10] trial: 16, train_loss: 1.342984, train acc: 7.352941176470588, test loss: 1.321200, test acc: 11.030000000000001, bias2: 0.9716753959655762, variance: 0.3495241105556488
Train size: [784] hidden size: [10] batch size: [10] trial: 17, train_loss: 1.349364, train acc: 6.944444444444445, test loss: 1.319679, test acc: 11.096666666666668, bias2: 0.9665683507919312, variance: 0.3531111776828766
Train size: [784] hidden size: [10] batch size: [10] trial: 18, train_loss: 1.344174, train acc: 6.578947368421052, test loss: 

Train size: [784] hidden size: [12] batch size: [10] trial: 2, train_loss: 1.208296, train acc: 0.0, test loss: 1.232251, test acc: 9.909999999999998, bias2: 1.0399352312088013, variance: 0.19231536984443665
Train size: [784] hidden size: [12] batch size: [10] trial: 3, train_loss: 1.185280, train acc: 0.0, test loss: 1.230808, test acc: 9.889999999999999, bias2: 1.032089352607727, variance: 0.19871844351291656
Train size: [784] hidden size: [12] batch size: [10] trial: 4, train_loss: 1.169298, train acc: 5.0, test loss: 1.258882, test acc: 9.291999999999998, bias2: 1.0390169620513916, variance: 0.21986529231071472
Train size: [784] hidden size: [12] batch size: [10] trial: 5, train_loss: 1.161824, train acc: 8.333333333333334, test loss: 1.265059, test acc: 9.336666666666666, bias2: 1.0268676280975342, variance: 0.2381916642189026
Train size: [784] hidden size: [12] batch size: [10] trial: 6, train_loss: 1.215494, train acc: 7.142857142857143, test loss: 1.257815, test acc: 9.01714285

Train size: [784] hidden size: [12] batch size: [10] trial: 40, train_loss: 1.254583, train acc: 9.146341463414634, test loss: 1.269475, test acc: 11.230975609756095, bias2: 0.9484221935272217, variance: 0.3210528790950775
Train size: [784] hidden size: [12] batch size: [10] trial: 41, train_loss: 1.251304, train acc: 8.928571428571429, test loss: 1.268479, test acc: 11.220714285714283, bias2: 0.9477465152740479, variance: 0.3207327425479889
Train size: [784] hidden size: [12] batch size: [10] trial: 42, train_loss: 1.245251, train acc: 9.30232558139535, test loss: 1.266515, test acc: 11.186511627906976, bias2: 0.9471493363380432, variance: 0.31936532258987427
Train size: [784] hidden size: [12] batch size: [10] trial: 43, train_loss: 1.240399, train acc: 9.659090909090908, test loss: 1.265241, test acc: 11.143863636363635, bias2: 0.9448522329330444, variance: 0.3203882873058319
Train size: [784] hidden size: [12] batch size: [10] trial: 44, train_loss: 1.241058, train acc: 10.0, test 

Train size: [784] hidden size: [14] batch size: [10] trial: 28, train_loss: 1.204722, train acc: 10.344827586206897, test loss: 1.253813, test acc: 11.325862068965517, bias2: 0.9252249002456665, variance: 0.3285883367061615
Train size: [784] hidden size: [14] batch size: [10] trial: 29, train_loss: 1.205378, train acc: 10.833333333333334, test loss: 1.260328, test acc: 11.440333333333333, bias2: 0.9270839691162109, variance: 0.33324435353279114
Train size: [784] hidden size: [14] batch size: [10] trial: 30, train_loss: 1.199717, train acc: 10.483870967741936, test loss: 1.264051, test acc: 11.345806451612903, bias2: 0.9308531880378723, variance: 0.3331974148750305
Train size: [784] hidden size: [14] batch size: [10] trial: 31, train_loss: 1.198128, train acc: 10.9375, test loss: 1.262102, test acc: 11.307812499999999, bias2: 0.9291877746582031, variance: 0.33291375637054443
Train size: [784] hidden size: [14] batch size: [10] trial: 32, train_loss: 1.186484, train acc: 12.1212121212121

Train size: [784] hidden size: [16] batch size: [10] trial: 16, train_loss: 1.290681, train acc: 5.882352941176471, test loss: 1.285071, test acc: 10.32235294117647, bias2: 0.9390250444412231, variance: 0.34604549407958984
Train size: [784] hidden size: [16] batch size: [10] trial: 17, train_loss: 1.287795, train acc: 5.555555555555555, test loss: 1.280942, test acc: 10.58222222222222, bias2: 0.9323407411575317, variance: 0.3486007750034332
Train size: [784] hidden size: [16] batch size: [10] trial: 18, train_loss: 1.270821, train acc: 5.2631578947368425, test loss: 1.270709, test acc: 10.47157894736842, bias2: 0.9277312755584717, variance: 0.3429774045944214
Train size: [784] hidden size: [16] batch size: [10] trial: 19, train_loss: 1.278408, train acc: 5.0, test loss: 1.270498, test acc: 10.530999999999999, bias2: 0.9234095215797424, variance: 0.34708887338638306
Train size: [784] hidden size: [16] batch size: [10] trial: 20, train_loss: 1.273205, train acc: 5.9523809523809526, test 

Train size: [784] hidden size: [18] batch size: [10] trial: 3, train_loss: 1.207924, train acc: 18.75, test loss: 1.232199, test acc: 9.405, bias2: 0.9961391091346741, variance: 0.2360599786043167
Train size: [784] hidden size: [18] batch size: [10] trial: 4, train_loss: 1.227314, train acc: 15.0, test loss: 1.244643, test acc: 9.947999999999999, bias2: 0.9879249930381775, variance: 0.2567179799079895
Train size: [784] hidden size: [18] batch size: [10] trial: 5, train_loss: 1.232573, train acc: 12.5, test loss: 1.247709, test acc: 9.308333333333332, bias2: 0.9825710654258728, variance: 0.26513785123825073
Train size: [784] hidden size: [18] batch size: [10] trial: 6, train_loss: 1.242018, train acc: 10.714285714285714, test loss: 1.248547, test acc: 9.861428571428572, bias2: 0.9803647994995117, variance: 0.26818227767944336
Train size: [784] hidden size: [18] batch size: [10] trial: 7, train_loss: 1.224328, train acc: 12.5, test loss: 1.247849, test acc: 10.78625, bias2: 0.96371459960

Train size: [784] hidden size: [18] batch size: [10] trial: 41, train_loss: 1.243221, train acc: 10.119047619047619, test loss: 1.245649, test acc: 11.044761904761904, bias2: 0.9149579405784607, variance: 0.33069103956222534
Train size: [784] hidden size: [18] batch size: [10] trial: 42, train_loss: 1.243126, train acc: 9.883720930232558, test loss: 1.245915, test acc: 10.96860465116279, bias2: 0.9167384505271912, variance: 0.3291768431663513
Train size: [784] hidden size: [18] batch size: [10] trial: 43, train_loss: 1.237573, train acc: 9.659090909090908, test loss: 1.246262, test acc: 10.889318181818181, bias2: 0.9170897603034973, variance: 0.32917243242263794
Train size: [784] hidden size: [18] batch size: [10] trial: 44, train_loss: 1.231129, train acc: 10.0, test loss: 1.244564, test acc: 10.936666666666666, bias2: 0.9162441492080688, variance: 0.3283202350139618
Train size: [784] hidden size: [18] batch size: [10] trial: 45, train_loss: 1.232458, train acc: 9.782608695652174, tes

Train size: [784] hidden size: [21] batch size: [10] trial: 29, train_loss: 1.149674, train acc: 14.166666666666666, test loss: 1.209802, test acc: 11.698666666666664, bias2: 0.9063127636909485, variance: 0.3034890294075012
Train size: [784] hidden size: [21] batch size: [10] trial: 30, train_loss: 1.147155, train acc: 14.516129032258064, test loss: 1.211305, test acc: 11.749354838709674, bias2: 0.9084961414337158, variance: 0.3028090000152588
Train size: [784] hidden size: [21] batch size: [10] trial: 31, train_loss: 1.145385, train acc: 15.625, test loss: 1.213824, test acc: 11.826249999999996, bias2: 0.9090598821640015, variance: 0.3047637641429901
Train size: [784] hidden size: [21] batch size: [10] trial: 32, train_loss: 1.147486, train acc: 15.151515151515152, test loss: 1.214914, test acc: 11.807575757575753, bias2: 0.9106749296188354, variance: 0.3042386770248413
Train size: [784] hidden size: [21] batch size: [10] trial: 33, train_loss: 1.149255, train acc: 15.441176470588236,

Train size: [784] hidden size: [24] batch size: [10] trial: 16, train_loss: 1.228255, train acc: 13.235294117647058, test loss: 1.202639, test acc: 11.525882352941178, bias2: 0.9097043871879578, variance: 0.2929350733757019
Train size: [784] hidden size: [24] batch size: [10] trial: 17, train_loss: 1.217144, train acc: 13.88888888888889, test loss: 1.202465, test acc: 11.53277777777778, bias2: 0.9118952751159668, variance: 0.2905693054199219
Train size: [784] hidden size: [24] batch size: [10] trial: 18, train_loss: 1.227945, train acc: 13.157894736842104, test loss: 1.201689, test acc: 11.409473684210528, bias2: 0.9107152223587036, variance: 0.29097428917884827
Train size: [784] hidden size: [24] batch size: [10] trial: 19, train_loss: 1.223399, train acc: 12.5, test loss: 1.201093, test acc: 11.489500000000001, bias2: 0.9075281620025635, variance: 0.29356515407562256
Train size: [784] hidden size: [24] batch size: [10] trial: 20, train_loss: 1.227223, train acc: 11.904761904761905, t

Train size: [784] hidden size: [28] batch size: [10] trial: 4, train_loss: 1.269667, train acc: 10.0, test loss: 1.199428, test acc: 10.16, bias2: 0.9693907499313354, variance: 0.23003724217414856
Train size: [784] hidden size: [28] batch size: [10] trial: 5, train_loss: 1.233957, train acc: 16.666666666666668, test loss: 1.205851, test acc: 10.635, bias2: 0.9507343769073486, variance: 0.25511667132377625
Train size: [784] hidden size: [28] batch size: [10] trial: 6, train_loss: 1.211084, train acc: 14.285714285714286, test loss: 1.205264, test acc: 10.444285714285714, bias2: 0.9441977143287659, variance: 0.2610660195350647
Train size: [784] hidden size: [28] batch size: [10] trial: 7, train_loss: 1.240217, train acc: 12.5, test loss: 1.201726, test acc: 10.6775, bias2: 0.9404301643371582, variance: 0.2612956762313843
Train size: [784] hidden size: [28] batch size: [10] trial: 8, train_loss: 1.240700, train acc: 11.11111111111111, test loss: 1.194812, test acc: 10.714444444444446, bias

Train size: [784] hidden size: [28] batch size: [10] trial: 42, train_loss: 1.195260, train acc: 12.790697674418604, test loss: 1.185512, test acc: 11.40046511627907, bias2: 0.8986880779266357, variance: 0.2868237793445587
Train size: [784] hidden size: [28] batch size: [10] trial: 43, train_loss: 1.189225, train acc: 13.068181818181818, test loss: 1.182950, test acc: 11.474318181818182, bias2: 0.8970934152603149, variance: 0.2858565151691437
Train size: [784] hidden size: [28] batch size: [10] trial: 44, train_loss: 1.199098, train acc: 12.777777777777779, test loss: 1.186732, test acc: 11.409777777777778, bias2: 0.8987220525741577, variance: 0.288009911775589
Train size: [784] hidden size: [28] batch size: [10] trial: 45, train_loss: 1.201035, train acc: 12.5, test loss: 1.189086, test acc: 11.32217391304348, bias2: 0.9012190103530884, variance: 0.28786662220954895
Train size: [784] hidden size: [28] batch size: [10] trial: 46, train_loss: 1.199919, train acc: 12.23404255319149, test

Train size: [784] hidden size: [32] batch size: [10] trial: 30, train_loss: 1.125195, train acc: 15.32258064516129, test loss: 1.166660, test acc: 11.557741935483868, bias2: 0.8916456699371338, variance: 0.27501389384269714
Train size: [784] hidden size: [32] batch size: [10] trial: 31, train_loss: 1.123743, train acc: 15.625, test loss: 1.164507, test acc: 11.595937499999996, bias2: 0.8902578353881836, variance: 0.27424904704093933
Train size: [784] hidden size: [32] batch size: [10] trial: 32, train_loss: 1.119287, train acc: 15.909090909090908, test loss: 1.161916, test acc: 11.875151515151511, bias2: 0.8890649080276489, variance: 0.2728508710861206
Train size: [784] hidden size: [32] batch size: [10] trial: 33, train_loss: 1.125661, train acc: 15.441176470588236, test loss: 1.166535, test acc: 11.729117647058821, bias2: 0.8913431763648987, variance: 0.2751917243003845
Train size: [784] hidden size: [32] batch size: [10] trial: 34, train_loss: 1.124391, train acc: 15.714285714285714

Train size: [784] hidden size: [37] batch size: [10] trial: 18, train_loss: 1.104836, train acc: 22.36842105263158, test loss: 1.134201, test acc: 12.854210526315788, bias2: 0.8687645792961121, variance: 0.26543623208999634
Train size: [784] hidden size: [37] batch size: [10] trial: 19, train_loss: 1.097550, train acc: 22.5, test loss: 1.135697, test acc: 12.9475, bias2: 0.8705068230628967, variance: 0.2651905417442322
Train size: [784] hidden size: [37] batch size: [10] trial: 20, train_loss: 1.100131, train acc: 22.61904761904762, test loss: 1.137862, test acc: 12.83238095238095, bias2: 0.8732951283454895, variance: 0.2645666003227234
Train size: [784] hidden size: [37] batch size: [10] trial: 21, train_loss: 1.099489, train acc: 21.59090909090909, test loss: 1.135292, test acc: 12.845454545454544, bias2: 0.8717070817947388, variance: 0.26358476281166077
Train size: [784] hidden size: [37] batch size: [10] trial: 22, train_loss: 1.092258, train acc: 21.73913043478261, test loss: 1.13

Train size: [784] hidden size: [43] batch size: [10] trial: 5, train_loss: 1.061948, train acc: 20.833333333333332, test loss: 1.104837, test acc: 15.255, bias2: 0.88773113489151, variance: 0.2171054482460022
Train size: [784] hidden size: [43] batch size: [10] trial: 6, train_loss: 1.064990, train acc: 28.571428571428573, test loss: 1.102622, test acc: 15.911428571428571, bias2: 0.8743802905082703, variance: 0.22824174165725708
Train size: [784] hidden size: [43] batch size: [10] trial: 7, train_loss: 1.071700, train acc: 25.0, test loss: 1.100049, test acc: 15.6725, bias2: 0.870698094367981, variance: 0.22935079038143158
Train size: [784] hidden size: [43] batch size: [10] trial: 8, train_loss: 1.068505, train acc: 25.0, test loss: 1.105492, test acc: 15.156666666666666, bias2: 0.8739234805107117, variance: 0.23156899213790894
Train size: [784] hidden size: [43] batch size: [10] trial: 9, train_loss: 1.062126, train acc: 22.5, test loss: 1.104415, test acc: 14.915000000000001, bias2:

Train size: [784] hidden size: [43] batch size: [10] trial: 43, train_loss: 1.125397, train acc: 10.795454545454545, test loss: 1.120756, test acc: 13.001136363636363, bias2: 0.8641129732131958, variance: 0.2566434442996979
Train size: [784] hidden size: [43] batch size: [10] trial: 44, train_loss: 1.130707, train acc: 11.11111111111111, test loss: 1.120796, test acc: 13.046666666666665, bias2: 0.8632057905197144, variance: 0.2575904130935669
Train size: [784] hidden size: [43] batch size: [10] trial: 45, train_loss: 1.134077, train acc: 11.41304347826087, test loss: 1.120567, test acc: 13.14478260869565, bias2: 0.8630887269973755, variance: 0.2574787437915802
Train size: [784] hidden size: [43] batch size: [10] trial: 46, train_loss: 1.133803, train acc: 11.702127659574469, test loss: 1.120456, test acc: 13.110212765957444, bias2: 0.863055408000946, variance: 0.2574008107185364
Train size: [784] hidden size: [43] batch size: [10] trial: 47, train_loss: 1.138926, train acc: 11.45833333

Train size: [784] hidden size: [49] batch size: [10] trial: 31, train_loss: 1.104851, train acc: 10.15625, test loss: 1.117655, test acc: 12.428749999999997, bias2: 0.8674958944320679, variance: 0.25015923380851746
Train size: [784] hidden size: [49] batch size: [10] trial: 32, train_loss: 1.115308, train acc: 9.848484848484848, test loss: 1.119075, test acc: 12.29333333333333, bias2: 0.8683716058731079, variance: 0.250703364610672
Train size: [784] hidden size: [49] batch size: [10] trial: 33, train_loss: 1.116311, train acc: 9.558823529411764, test loss: 1.119648, test acc: 12.279999999999996, bias2: 0.8685927391052246, variance: 0.25105488300323486
Train size: [784] hidden size: [49] batch size: [10] trial: 34, train_loss: 1.118541, train acc: 9.285714285714286, test loss: 1.119844, test acc: 12.282571428571424, bias2: 0.8682212829589844, variance: 0.2516225576400757
Train size: [784] hidden size: [49] batch size: [10] trial: 35, train_loss: 1.115677, train acc: 9.027777777777779, t

Train size: [784] hidden size: [56] batch size: [10] trial: 19, train_loss: 1.095664, train acc: 17.5, test loss: 1.104777, test acc: 12.937000000000001, bias2: 0.8613460659980774, variance: 0.24343067407608032
Train size: [784] hidden size: [56] batch size: [10] trial: 20, train_loss: 1.094274, train acc: 16.666666666666668, test loss: 1.102426, test acc: 13.384761904761904, bias2: 0.8592116832733154, variance: 0.24321460723876953
Train size: [784] hidden size: [56] batch size: [10] trial: 21, train_loss: 1.104793, train acc: 15.909090909090908, test loss: 1.102763, test acc: 13.186363636363636, bias2: 0.8602311015129089, variance: 0.24253219366073608
Train size: [784] hidden size: [56] batch size: [10] trial: 22, train_loss: 1.105527, train acc: 15.217391304347826, test loss: 1.101920, test acc: 13.149130434782606, bias2: 0.8597294688224792, variance: 0.2421901822090149
Train size: [784] hidden size: [56] batch size: [10] trial: 23, train_loss: 1.116999, train acc: 14.583333333333334

Train size: [784] hidden size: [65] batch size: [10] trial: 6, train_loss: 1.089929, train acc: 17.857142857142858, test loss: 1.074011, test acc: 14.80857142857143, bias2: 0.8582452535629272, variance: 0.21576620638370514
Train size: [784] hidden size: [65] batch size: [10] trial: 7, train_loss: 1.065579, train acc: 21.875, test loss: 1.075749, test acc: 14.39, bias2: 0.85636305809021, variance: 0.21938572824001312
Train size: [784] hidden size: [65] batch size: [10] trial: 8, train_loss: 1.084884, train acc: 19.444444444444443, test loss: 1.079749, test acc: 14.055555555555555, bias2: 0.8587977886199951, variance: 0.220951110124588
Train size: [784] hidden size: [65] batch size: [10] trial: 9, train_loss: 1.073834, train acc: 17.5, test loss: 1.070020, test acc: 14.805000000000001, bias2: 0.8481193780899048, variance: 0.22190070152282715
Train size: [784] hidden size: [65] batch size: [10] trial: 10, train_loss: 1.106512, train acc: 15.909090909090908, test loss: 1.071390, test acc: 

Train size: [784] hidden size: [65] batch size: [10] trial: 43, train_loss: 1.095915, train acc: 16.477272727272727, test loss: 1.082643, test acc: 14.408181818181818, bias2: 0.8377985954284668, variance: 0.24484418332576752
Train size: [784] hidden size: [65] batch size: [10] trial: 44, train_loss: 1.097322, train acc: 16.11111111111111, test loss: 1.082504, test acc: 14.349777777777778, bias2: 0.8382198810577393, variance: 0.24428440630435944
Train size: [784] hidden size: [65] batch size: [10] trial: 45, train_loss: 1.095655, train acc: 16.304347826086957, test loss: 1.083542, test acc: 14.27304347826087, bias2: 0.8391525745391846, variance: 0.24438942968845367
Train size: [784] hidden size: [65] batch size: [10] trial: 46, train_loss: 1.094574, train acc: 15.957446808510639, test loss: 1.084201, test acc: 14.227021276595746, bias2: 0.8401125073432922, variance: 0.2440883368253708
Train size: [784] hidden size: [65] batch size: [10] trial: 47, train_loss: 1.095057, train acc: 15.625

Train size: [784] hidden size: [75] batch size: [10] trial: 31, train_loss: 1.081128, train acc: 14.84375, test loss: 1.066198, test acc: 15.7115625, bias2: 0.8310951590538025, variance: 0.2351033240556717
Train size: [784] hidden size: [75] batch size: [10] trial: 32, train_loss: 1.077020, train acc: 15.151515151515152, test loss: 1.065172, test acc: 15.747878787878786, bias2: 0.8301804065704346, variance: 0.23499202728271484
Train size: [784] hidden size: [75] batch size: [10] trial: 33, train_loss: 1.083117, train acc: 14.705882352941176, test loss: 1.063718, test acc: 15.854411764705882, bias2: 0.8288459181785583, variance: 0.23487167060375214
Train size: [784] hidden size: [75] batch size: [10] trial: 34, train_loss: 1.091182, train acc: 14.285714285714286, test loss: 1.065153, test acc: 15.685142857142855, bias2: 0.8302042484283447, variance: 0.23494863510131836
Train size: [784] hidden size: [75] batch size: [10] trial: 35, train_loss: 1.088941, train acc: 14.583333333333334, te

Train size: [784] hidden size: [86] batch size: [10] trial: 19, train_loss: 1.047096, train acc: 13.75, test loss: 1.071277, test acc: 14.7875, bias2: 0.844903826713562, variance: 0.22637304663658142
Train size: [784] hidden size: [86] batch size: [10] trial: 20, train_loss: 1.048625, train acc: 14.285714285714286, test loss: 1.072001, test acc: 14.711428571428572, bias2: 0.8454965949058533, variance: 0.22650474309921265
Train size: [784] hidden size: [86] batch size: [10] trial: 21, train_loss: 1.045067, train acc: 14.772727272727273, test loss: 1.070260, test acc: 14.87409090909091, bias2: 0.8441267013549805, variance: 0.22613368928432465
Train size: [784] hidden size: [86] batch size: [10] trial: 22, train_loss: 1.040721, train acc: 14.130434782608695, test loss: 1.068875, test acc: 15.013478260869565, bias2: 0.8417913913726807, variance: 0.22708353400230408
Train size: [784] hidden size: [86] batch size: [10] trial: 23, train_loss: 1.042303, train acc: 13.541666666666666, test loss

Train size: [784] hidden size: [99] batch size: [10] trial: 7, train_loss: 1.087663, train acc: 12.5, test loss: 1.013796, test acc: 20.24125, bias2: 0.8061822056770325, variance: 0.20761390030384064
Train size: [784] hidden size: [99] batch size: [10] trial: 8, train_loss: 1.115358, train acc: 11.11111111111111, test loss: 1.019639, test acc: 19.46666666666667, bias2: 0.8078407645225525, variance: 0.21179872751235962
Train size: [784] hidden size: [99] batch size: [10] trial: 9, train_loss: 1.100360, train acc: 12.5, test loss: 1.026612, test acc: 18.834000000000003, bias2: 0.8122377395629883, variance: 0.21437454223632812
Train size: [784] hidden size: [99] batch size: [10] trial: 10, train_loss: 1.085189, train acc: 13.636363636363637, test loss: 1.028155, test acc: 18.575454545454548, bias2: 0.8116401433944702, variance: 0.21651531755924225
Train size: [784] hidden size: [99] batch size: [10] trial: 11, train_loss: 1.094667, train acc: 12.5, test loss: 1.032962, test acc: 18.111666

Train size: [784] hidden size: [99] batch size: [10] trial: 44, train_loss: 1.035509, train acc: 15.555555555555555, test loss: 1.037008, test acc: 17.753333333333337, bias2: 0.8027238845825195, variance: 0.23428429663181305
Train size: [784] hidden size: [99] batch size: [10] trial: 45, train_loss: 1.039756, train acc: 15.76086956521739, test loss: 1.037779, test acc: 17.670869565217398, bias2: 0.8031991124153137, variance: 0.23457984626293182
Train size: [784] hidden size: [99] batch size: [10] trial: 46, train_loss: 1.033702, train acc: 15.957446808510639, test loss: 1.037371, test acc: 17.661914893617027, bias2: 0.8026428818702698, variance: 0.23472826182842255
Train size: [784] hidden size: [99] batch size: [10] trial: 47, train_loss: 1.031494, train acc: 16.145833333333332, test loss: 1.037798, test acc: 17.601875000000003, bias2: 0.8032433986663818, variance: 0.23455415666103363
Train size: [784] hidden size: [99] batch size: [10] trial: 48, train_loss: 1.033825, train acc: 15.8

Train size: [784] hidden size: [114] batch size: [10] trial: 31, train_loss: 1.017823, train acc: 17.1875, test loss: 1.032586, test acc: 18.795625, bias2: 0.8024402856826782, variance: 0.23014618456363678
Train size: [784] hidden size: [114] batch size: [10] trial: 32, train_loss: 1.018154, train acc: 16.666666666666668, test loss: 1.032237, test acc: 18.762424242424245, bias2: 0.8027926087379456, variance: 0.2294439673423767
Train size: [784] hidden size: [114] batch size: [10] trial: 33, train_loss: 1.014534, train acc: 16.91176470588235, test loss: 1.032233, test acc: 18.804411764705886, bias2: 0.8018543720245361, variance: 0.23037837445735931
Train size: [784] hidden size: [114] batch size: [10] trial: 34, train_loss: 1.016317, train acc: 17.142857142857142, test loss: 1.032498, test acc: 18.788571428571434, bias2: 0.8015204071998596, variance: 0.23097725212574005
Train size: [784] hidden size: [114] batch size: [10] trial: 35, train_loss: 1.013964, train acc: 18.055555555555557, 

Train size: [784] hidden size: [131] batch size: [10] trial: 19, train_loss: 0.997795, train acc: 22.5, test loss: 1.012368, test acc: 19.365500000000004, bias2: 0.791596531867981, variance: 0.2207716852426529
Train size: [784] hidden size: [131] batch size: [10] trial: 20, train_loss: 0.995411, train acc: 21.428571428571427, test loss: 1.012205, test acc: 19.268095238095242, bias2: 0.7908275127410889, variance: 0.22137723863124847
Train size: [784] hidden size: [131] batch size: [10] trial: 21, train_loss: 0.999379, train acc: 20.454545454545453, test loss: 1.014355, test acc: 19.068636363636365, bias2: 0.7927940487861633, variance: 0.2215615063905716
Train size: [784] hidden size: [131] batch size: [10] trial: 22, train_loss: 1.002492, train acc: 20.652173913043477, test loss: 1.015532, test acc: 18.962608695652175, bias2: 0.7933552265167236, variance: 0.22217728197574615
Train size: [784] hidden size: [131] batch size: [10] trial: 23, train_loss: 0.998847, train acc: 20.833333333333

Train size: [784] hidden size: [151] batch size: [10] trial: 6, train_loss: 0.969980, train acc: 14.285714285714286, test loss: 0.991912, test acc: 22.021428571428572, bias2: 0.7953548431396484, variance: 0.1965574324131012
Train size: [784] hidden size: [151] batch size: [10] trial: 7, train_loss: 0.990850, train acc: 12.5, test loss: 0.989330, test acc: 21.89125, bias2: 0.7922597527503967, variance: 0.19706988334655762
Train size: [784] hidden size: [151] batch size: [10] trial: 8, train_loss: 0.979288, train acc: 11.11111111111111, test loss: 0.986705, test acc: 21.85222222222222, bias2: 0.7888228297233582, variance: 0.19788259267807007
Train size: [784] hidden size: [151] batch size: [10] trial: 9, train_loss: 0.999358, train acc: 12.5, test loss: 0.989890, test acc: 21.276999999999997, bias2: 0.7904002666473389, variance: 0.19948947429656982
Train size: [784] hidden size: [151] batch size: [10] trial: 10, train_loss: 1.000821, train acc: 11.363636363636363, test loss: 0.988776, te

Train size: [784] hidden size: [151] batch size: [10] trial: 43, train_loss: 0.986842, train acc: 15.909090909090908, test loss: 0.984608, test acc: 22.620227272727274, bias2: 0.7637275457382202, variance: 0.22088059782981873
Train size: [784] hidden size: [151] batch size: [10] trial: 44, train_loss: 0.988966, train acc: 15.555555555555555, test loss: 0.985330, test acc: 22.47311111111111, bias2: 0.7643407583236694, variance: 0.22098930180072784
Train size: [784] hidden size: [151] batch size: [10] trial: 45, train_loss: 0.989643, train acc: 15.217391304347826, test loss: 0.986320, test acc: 22.34391304347826, bias2: 0.7654426693916321, variance: 0.22087697684764862
Train size: [784] hidden size: [151] batch size: [10] trial: 46, train_loss: 0.993201, train acc: 14.893617021276595, test loss: 0.986429, test acc: 22.276170212765958, bias2: 0.765571653842926, variance: 0.22085730731487274
Train size: [784] hidden size: [151] batch size: [10] trial: 47, train_loss: 0.992855, train acc: 1

Train size: [784] hidden size: [174] batch size: [10] trial: 31, train_loss: 0.942349, train acc: 27.34375, test loss: 0.967346, test acc: 24.667812499999993, bias2: 0.7506635189056396, variance: 0.21668240427970886
Train size: [784] hidden size: [174] batch size: [10] trial: 32, train_loss: 0.945619, train acc: 26.515151515151516, test loss: 0.968625, test acc: 24.540909090909086, bias2: 0.7522580623626709, variance: 0.21636667847633362
Train size: [784] hidden size: [174] batch size: [10] trial: 33, train_loss: 0.945892, train acc: 26.470588235294116, test loss: 0.968596, test acc: 24.509411764705877, bias2: 0.7526659965515137, variance: 0.21593017876148224
Train size: [784] hidden size: [174] batch size: [10] trial: 34, train_loss: 0.946277, train acc: 26.428571428571427, test loss: 0.967892, test acc: 24.588571428571424, bias2: 0.7525386214256287, variance: 0.21535317599773407
Train size: [784] hidden size: [174] batch size: [10] trial: 35, train_loss: 0.948740, train acc: 26.38888

Train size: [784] hidden size: [201] batch size: [10] trial: 19, train_loss: 0.903881, train acc: 27.5, test loss: 0.936496, test acc: 28.149999999999995, bias2: 0.7282444834709167, variance: 0.20825140178203583
Train size: [784] hidden size: [201] batch size: [10] trial: 20, train_loss: 0.917701, train acc: 26.19047619047619, test loss: 0.936966, test acc: 27.950476190476188, bias2: 0.7284414768218994, variance: 0.20852415263652802
Train size: [784] hidden size: [201] batch size: [10] trial: 21, train_loss: 0.925679, train acc: 26.136363636363637, test loss: 0.936807, test acc: 28.107727272727267, bias2: 0.7275203466415405, variance: 0.20928660035133362
Train size: [784] hidden size: [201] batch size: [10] trial: 22, train_loss: 0.925336, train acc: 26.08695652173913, test loss: 0.937680, test acc: 27.982173913043475, bias2: 0.727783203125, variance: 0.20989659428596497
Train size: [784] hidden size: [201] batch size: [10] trial: 23, train_loss: 0.937050, train acc: 25.0, test loss: 0

Train size: [784] hidden size: [231] batch size: [10] trial: 7, train_loss: 0.912442, train acc: 28.125, test loss: 0.919968, test acc: 31.71125, bias2: 0.7347939014434814, variance: 0.1851743757724762
Train size: [784] hidden size: [231] batch size: [10] trial: 8, train_loss: 0.921969, train acc: 25.0, test loss: 0.926749, test acc: 30.692222222222224, bias2: 0.7382684946060181, variance: 0.18848063051700592
Train size: [784] hidden size: [231] batch size: [10] trial: 9, train_loss: 0.920939, train acc: 25.0, test loss: 0.925573, test acc: 30.668, bias2: 0.7359269857406616, variance: 0.18964648246765137
Train size: [784] hidden size: [231] batch size: [10] trial: 10, train_loss: 0.908203, train acc: 27.272727272727273, test loss: 0.922919, test acc: 30.84909090909091, bias2: 0.731351375579834, variance: 0.1915675699710846
Train size: [784] hidden size: [231] batch size: [10] trial: 11, train_loss: 0.904679, train acc: 27.083333333333332, test loss: 0.918849, test acc: 31.1208333333333

Train size: [784] hidden size: [231] batch size: [10] trial: 45, train_loss: 0.919446, train acc: 27.17391304347826, test loss: 0.918363, test acc: 30.49130434782609, bias2: 0.7140598297119141, variance: 0.20430363714694977
Train size: [784] hidden size: [231] batch size: [10] trial: 46, train_loss: 0.918518, train acc: 27.659574468085108, test loss: 0.917403, test acc: 30.602553191489367, bias2: 0.7129162549972534, variance: 0.2044869363307953
Train size: [784] hidden size: [231] batch size: [10] trial: 47, train_loss: 0.913680, train acc: 28.125, test loss: 0.917353, test acc: 30.62875, bias2: 0.7129191160202026, variance: 0.20443391799926758
Train size: [784] hidden size: [231] batch size: [10] trial: 48, train_loss: 0.913849, train acc: 28.06122448979592, test loss: 0.917944, test acc: 30.54918367346939, bias2: 0.7133666276931763, variance: 0.20457778871059418
Train size: [784] hidden size: [231] batch size: [10] trial: 49, train_loss: 0.912665, train acc: 28.5, test loss: 0.917956

Train size: [784] hidden size: [266] batch size: [10] trial: 33, train_loss: 0.849694, train acc: 36.029411764705884, test loss: 0.905490, test acc: 32.082058823529415, bias2: 0.7044820189476013, variance: 0.20100758969783783
Train size: [784] hidden size: [266] batch size: [10] trial: 34, train_loss: 0.848917, train acc: 35.714285714285715, test loss: 0.904963, test acc: 32.12542857142857, bias2: 0.7042564153671265, variance: 0.20070606470108032
Train size: [784] hidden size: [266] batch size: [10] trial: 35, train_loss: 0.852877, train acc: 36.111111111111114, test loss: 0.904102, test acc: 32.17805555555556, bias2: 0.7036043405532837, variance: 0.20049771666526794
Train size: [784] hidden size: [266] batch size: [10] trial: 36, train_loss: 0.855389, train acc: 35.13513513513514, test loss: 0.903232, test acc: 32.28027027027027, bias2: 0.7027930021286011, variance: 0.20043900609016418
Train size: [784] hidden size: [266] batch size: [10] trial: 37, train_loss: 0.851076, train acc: 35

Train size: [784] hidden size: [306] batch size: [10] trial: 20, train_loss: 0.839647, train acc: 41.666666666666664, test loss: 0.868388, test acc: 36.8, bias2: 0.6814695596694946, variance: 0.18691812455654144
Train size: [784] hidden size: [306] batch size: [10] trial: 21, train_loss: 0.827530, train acc: 43.18181818181818, test loss: 0.868738, test acc: 36.718636363636364, bias2: 0.6812140941619873, variance: 0.18752411007881165
Train size: [784] hidden size: [306] batch size: [10] trial: 22, train_loss: 0.829078, train acc: 42.391304347826086, test loss: 0.869447, test acc: 36.703478260869566, bias2: 0.6814973950386047, variance: 0.1879497766494751
Train size: [784] hidden size: [306] batch size: [10] trial: 23, train_loss: 0.839058, train acc: 40.625, test loss: 0.869691, test acc: 36.73791666666666, bias2: 0.6815458536148071, variance: 0.18814510107040405
Train size: [784] hidden size: [306] batch size: [10] trial: 24, train_loss: 0.846328, train acc: 39.0, test loss: 0.871407, 

Train size: [784] hidden size: [353] batch size: [10] trial: 7, train_loss: 0.802684, train acc: 46.875, test loss: 0.842882, test acc: 41.47125, bias2: 0.675792396068573, variance: 0.16708971560001373
Train size: [784] hidden size: [353] batch size: [10] trial: 8, train_loss: 0.785316, train acc: 47.22222222222222, test loss: 0.844188, test acc: 41.16333333333333, bias2: 0.6747439503669739, variance: 0.1694440245628357
Train size: [784] hidden size: [353] batch size: [10] trial: 9, train_loss: 0.790578, train acc: 45.0, test loss: 0.843757, test acc: 41.16, bias2: 0.6704460978507996, variance: 0.17331093549728394
Train size: [784] hidden size: [353] batch size: [10] trial: 10, train_loss: 0.783871, train acc: 45.45454545454545, test loss: 0.845271, test acc: 40.71272727272727, bias2: 0.6707656979560852, variance: 0.1745050698518753
Train size: [784] hidden size: [353] batch size: [10] trial: 11, train_loss: 0.778988, train acc: 50.0, test loss: 0.842218, test acc: 41.08833333333333, b

Train size: [784] hidden size: [353] batch size: [10] trial: 45, train_loss: 0.802059, train acc: 44.56521739130435, test loss: 0.844761, test acc: 40.47760869565217, bias2: 0.6550688147544861, variance: 0.18969255685806274
Train size: [784] hidden size: [353] batch size: [10] trial: 46, train_loss: 0.799867, train acc: 44.680851063829785, test loss: 0.843935, test acc: 40.56191489361702, bias2: 0.6544481515884399, variance: 0.18948662281036377
Train size: [784] hidden size: [353] batch size: [10] trial: 47, train_loss: 0.797292, train acc: 44.791666666666664, test loss: 0.842729, test acc: 40.715625, bias2: 0.6530635356903076, variance: 0.189665287733078
Train size: [784] hidden size: [353] batch size: [10] trial: 48, train_loss: 0.800449, train acc: 44.89795918367347, test loss: 0.842747, test acc: 40.72979591836735, bias2: 0.6528723835945129, variance: 0.18987464904785156
Train size: [784] hidden size: [353] batch size: [10] trial: 49, train_loss: 0.799995, train acc: 45.0, test los

Train size: [784] hidden size: [406] batch size: [10] trial: 33, train_loss: 0.767041, train acc: 48.529411764705884, test loss: 0.815714, test acc: 44.264705882352935, bias2: 0.63428795337677, variance: 0.18142637610435486
Train size: [784] hidden size: [406] batch size: [10] trial: 34, train_loss: 0.775575, train acc: 47.857142857142854, test loss: 0.816395, test acc: 44.28399999999999, bias2: 0.6339789032936096, variance: 0.18241602182388306
Train size: [784] hidden size: [406] batch size: [10] trial: 35, train_loss: 0.778795, train acc: 47.916666666666664, test loss: 0.815511, test acc: 44.44472222222222, bias2: 0.6329004764556885, variance: 0.18261052668094635
Train size: [784] hidden size: [406] batch size: [10] trial: 36, train_loss: 0.782855, train acc: 47.2972972972973, test loss: 0.815456, test acc: 44.4335135135135, bias2: 0.6329207420349121, variance: 0.18253538012504578
Train size: [784] hidden size: [406] batch size: [10] trial: 37, train_loss: 0.784082, train acc: 47.368

Train size: [784] hidden size: [468] batch size: [10] trial: 20, train_loss: 0.721434, train acc: 65.47619047619048, test loss: 0.785260, test acc: 48.649047619047614, bias2: 0.6111266016960144, variance: 0.17413337528705597
Train size: [784] hidden size: [468] batch size: [10] trial: 21, train_loss: 0.725473, train acc: 62.5, test loss: 0.786654, test acc: 48.459090909090904, bias2: 0.6117648482322693, variance: 0.1748887449502945
Train size: [784] hidden size: [468] batch size: [10] trial: 22, train_loss: 0.716557, train acc: 64.1304347826087, test loss: 0.786204, test acc: 48.45521739130434, bias2: 0.6111823320388794, variance: 0.175021693110466
Train size: [784] hidden size: [468] batch size: [10] trial: 23, train_loss: 0.711534, train acc: 64.58333333333333, test loss: 0.785128, test acc: 48.57041666666666, bias2: 0.6100834608078003, variance: 0.17504501342773438
Train size: [784] hidden size: [468] batch size: [10] trial: 24, train_loss: 0.713086, train acc: 65.0, test loss: 0.78

Train size: [784] hidden size: [538] batch size: [10] trial: 7, train_loss: 0.806283, train acc: 53.125, test loss: 0.776258, test acc: 49.64875000000001, bias2: 0.6214555501937866, variance: 0.15480279922485352
Train size: [784] hidden size: [538] batch size: [10] trial: 8, train_loss: 0.780455, train acc: 58.333333333333336, test loss: 0.774806, test acc: 49.62111111111111, bias2: 0.6180820465087891, variance: 0.15672387182712555
Train size: [784] hidden size: [538] batch size: [10] trial: 9, train_loss: 0.776627, train acc: 57.5, test loss: 0.774260, test acc: 49.831, bias2: 0.6153018474578857, variance: 0.15895813703536987
Train size: [784] hidden size: [538] batch size: [10] trial: 10, train_loss: 0.797706, train acc: 54.54545454545455, test loss: 0.774222, test acc: 49.98272727272728, bias2: 0.6144074201583862, variance: 0.1598142683506012
Train size: [784] hidden size: [538] batch size: [10] trial: 11, train_loss: 0.795046, train acc: 56.25, test loss: 0.775413, test acc: 49.650

Train size: [784] hidden size: [538] batch size: [10] trial: 44, train_loss: 0.743216, train acc: 60.0, test loss: 0.768206, test acc: 50.43666666666667, bias2: 0.5963944792747498, variance: 0.17181141674518585
Train size: [784] hidden size: [538] batch size: [10] trial: 45, train_loss: 0.742474, train acc: 60.32608695652174, test loss: 0.767970, test acc: 50.48673913043478, bias2: 0.5960845351219177, variance: 0.1718854308128357
Train size: [784] hidden size: [538] batch size: [10] trial: 46, train_loss: 0.744110, train acc: 60.1063829787234, test loss: 0.767455, test acc: 50.545319148936166, bias2: 0.5955020189285278, variance: 0.17195245623588562
Train size: [784] hidden size: [538] batch size: [10] trial: 47, train_loss: 0.741209, train acc: 60.9375, test loss: 0.767479, test acc: 50.54666666666666, bias2: 0.5955371856689453, variance: 0.17194171249866486
Train size: [784] hidden size: [538] batch size: [10] trial: 48, train_loss: 0.740000, train acc: 61.224489795918366, test loss:

Train size: [784] hidden size: [620] batch size: [10] trial: 32, train_loss: 0.714511, train acc: 62.121212121212125, test loss: 0.739012, test acc: 54.090303030303026, bias2: 0.5769689083099365, variance: 0.16204282641410828
Train size: [784] hidden size: [620] batch size: [10] trial: 33, train_loss: 0.706894, train acc: 62.5, test loss: 0.738553, test acc: 54.20588235294117, bias2: 0.5760166645050049, variance: 0.16253645718097687
Train size: [784] hidden size: [620] batch size: [10] trial: 34, train_loss: 0.705414, train acc: 62.142857142857146, test loss: 0.738739, test acc: 54.20257142857142, bias2: 0.5757271647453308, variance: 0.1630118042230606
Train size: [784] hidden size: [620] batch size: [10] trial: 35, train_loss: 0.706308, train acc: 61.111111111111114, test loss: 0.738381, test acc: 54.29499999999999, bias2: 0.5754161477088928, variance: 0.16296446323394775
Train size: [784] hidden size: [620] batch size: [10] trial: 36, train_loss: 0.704476, train acc: 60.8108108108108

Train size: [784] hidden size: [714] batch size: [10] trial: 20, train_loss: 0.713953, train acc: 51.19047619047619, test loss: 0.713597, test acc: 57.36809523809523, bias2: 0.5591700077056885, variance: 0.15442687273025513
Train size: [784] hidden size: [714] batch size: [10] trial: 21, train_loss: 0.720408, train acc: 50.0, test loss: 0.714098, test acc: 57.38681818181817, bias2: 0.558669924736023, variance: 0.1554284393787384
Train size: [784] hidden size: [714] batch size: [10] trial: 22, train_loss: 0.711780, train acc: 51.08695652173913, test loss: 0.714390, test acc: 57.3586956521739, bias2: 0.5585480332374573, variance: 0.15584194660186768
Train size: [784] hidden size: [714] batch size: [10] trial: 23, train_loss: 0.711785, train acc: 51.041666666666664, test loss: 0.713947, test acc: 57.48083333333332, bias2: 0.5579233765602112, variance: 0.15602348744869232
Train size: [784] hidden size: [714] batch size: [10] trial: 24, train_loss: 0.710607, train acc: 51.0, test loss: 0.71

Train size: [784] hidden size: [822] batch size: [10] trial: 8, train_loss: 0.792478, train acc: 52.77777777777778, test loss: 0.692378, test acc: 59.856666666666655, bias2: 0.5530710220336914, variance: 0.13930702209472656
Train size: [784] hidden size: [822] batch size: [10] trial: 9, train_loss: 0.799416, train acc: 47.5, test loss: 0.691876, test acc: 59.94999999999999, bias2: 0.5519276857376099, variance: 0.13994839787483215
Train size: [784] hidden size: [822] batch size: [10] trial: 10, train_loss: 0.798535, train acc: 47.72727272727273, test loss: 0.692670, test acc: 59.782727272727264, bias2: 0.5503034591674805, variance: 0.1423661708831787
Train size: [784] hidden size: [822] batch size: [10] trial: 11, train_loss: 0.796620, train acc: 47.916666666666664, test loss: 0.691242, test acc: 59.95833333333332, bias2: 0.5482480525970459, variance: 0.14299364387989044
Train size: [784] hidden size: [822] batch size: [10] trial: 12, train_loss: 0.774778, train acc: 51.92307692307692, 

Train size: [784] hidden size: [822] batch size: [10] trial: 45, train_loss: 0.678531, train acc: 61.41304347826087, test loss: 0.686481, test acc: 60.82999999999998, bias2: 0.5334213376045227, variance: 0.15306013822555542
Train size: [784] hidden size: [822] batch size: [10] trial: 46, train_loss: 0.679176, train acc: 60.638297872340424, test loss: 0.686682, test acc: 60.80702127659572, bias2: 0.5336602926254272, variance: 0.15302126109600067
Train size: [784] hidden size: [822] batch size: [10] trial: 47, train_loss: 0.674040, train acc: 60.9375, test loss: 0.686209, test acc: 60.88770833333331, bias2: 0.5331448316574097, variance: 0.15306459367275238
Train size: [784] hidden size: [822] batch size: [10] trial: 48, train_loss: 0.674381, train acc: 60.714285714285715, test loss: 0.686583, test acc: 60.915714285714266, bias2: 0.533315896987915, variance: 0.15326713025569916
Train size: [784] hidden size: [822] batch size: [10] trial: 49, train_loss: 0.672947, train acc: 61.0, test los

Train size: [784] hidden size: [946] batch size: [10] trial: 33, train_loss: 0.654624, train acc: 62.5, test loss: 0.666771, test acc: 63.12058823529413, bias2: 0.5201667547225952, variance: 0.1466037780046463
Train size: [784] hidden size: [946] batch size: [10] trial: 34, train_loss: 0.652105, train acc: 62.857142857142854, test loss: 0.666526, test acc: 63.17942857142858, bias2: 0.5197156071662903, variance: 0.14681053161621094
Train size: [784] hidden size: [946] batch size: [10] trial: 35, train_loss: 0.655298, train acc: 63.19444444444444, test loss: 0.666446, test acc: 63.17138888888889, bias2: 0.5195918679237366, variance: 0.14685428142547607
Train size: [784] hidden size: [946] batch size: [10] trial: 36, train_loss: 0.651999, train acc: 63.513513513513516, test loss: 0.665880, test acc: 63.26, bias2: 0.5191774368286133, variance: 0.14670298993587494
Train size: [784] hidden size: [946] batch size: [10] trial: 37, train_loss: 0.654086, train acc: 63.1578947368421, test loss: 0

Train size: [784] hidden size: [1089] batch size: [10] trial: 21, train_loss: 0.565832, train acc: 75.0, test loss: 0.643236, test acc: 65.7690909090909, bias2: 0.5043533444404602, variance: 0.13888241350650787
Train size: [784] hidden size: [1089] batch size: [10] trial: 22, train_loss: 0.572163, train acc: 75.0, test loss: 0.643077, test acc: 65.7795652173913, bias2: 0.5036633014678955, variance: 0.13941353559494019
Train size: [784] hidden size: [1089] batch size: [10] trial: 23, train_loss: 0.570890, train acc: 76.04166666666667, test loss: 0.643811, test acc: 65.73166666666667, bias2: 0.5039750337600708, variance: 0.1398361623287201
Train size: [784] hidden size: [1089] batch size: [10] trial: 24, train_loss: 0.584967, train acc: 73.0, test loss: 0.644319, test acc: 65.6524, bias2: 0.504258394241333, variance: 0.14006109535694122
Train size: [784] hidden size: [1089] batch size: [10] trial: 25, train_loss: 0.585732, train acc: 73.07692307692308, test loss: 0.644317, test acc: 65.6

Train size: [784] hidden size: [1254] batch size: [10] trial: 9, train_loss: 0.556516, train acc: 80.0, test loss: 0.624100, test acc: 68.301, bias2: 0.49494826793670654, variance: 0.1291515976190567
Train size: [784] hidden size: [1254] batch size: [10] trial: 10, train_loss: 0.544156, train acc: 81.81818181818181, test loss: 0.623491, test acc: 68.31545454545454, bias2: 0.4931386113166809, variance: 0.13035263121128082
Train size: [784] hidden size: [1254] batch size: [10] trial: 11, train_loss: 0.545635, train acc: 81.25, test loss: 0.623203, test acc: 68.31750000000001, bias2: 0.49193060398101807, variance: 0.13127215206623077
Train size: [784] hidden size: [1254] batch size: [10] trial: 12, train_loss: 0.548995, train acc: 80.76923076923077, test loss: 0.624909, test acc: 68.23153846153846, bias2: 0.49241191148757935, variance: 0.13249678909778595
Train size: [784] hidden size: [1254] batch size: [10] trial: 13, train_loss: 0.557457, train acc: 78.57142857142857, test loss: 0.6241

Train size: [784] hidden size: [1254] batch size: [10] trial: 46, train_loss: 0.545744, train acc: 78.72340425531915, test loss: 0.622956, test acc: 68.20957446808511, bias2: 0.48581254482269287, variance: 0.13714365661144257
Train size: [784] hidden size: [1254] batch size: [10] trial: 47, train_loss: 0.546659, train acc: 78.64583333333333, test loss: 0.623047, test acc: 68.22958333333334, bias2: 0.4858936667442322, variance: 0.13715343177318573
Train size: [784] hidden size: [1254] batch size: [10] trial: 48, train_loss: 0.546776, train acc: 78.57142857142857, test loss: 0.623066, test acc: 68.2330612244898, bias2: 0.485830694437027, variance: 0.1372351348400116
Train size: [784] hidden size: [1254] batch size: [10] trial: 49, train_loss: 0.550385, train acc: 78.0, test loss: 0.622928, test acc: 68.26580000000001, bias2: 0.48557183146476746, variance: 0.13735631108283997
##################################################
Train size: [784] hidden size: [1444] batch size: [10] trial: 0

Train size: [784] hidden size: [1444] batch size: [10] trial: 34, train_loss: 0.559710, train acc: 79.28571428571429, test loss: 0.600933, test acc: 70.50742857142855, bias2: 0.4701192378997803, variance: 0.13081347942352295
Train size: [784] hidden size: [1444] batch size: [10] trial: 35, train_loss: 0.562801, train acc: 79.16666666666667, test loss: 0.601029, test acc: 70.50916666666666, bias2: 0.4700290858745575, variance: 0.13100019097328186
Train size: [784] hidden size: [1444] batch size: [10] trial: 36, train_loss: 0.570212, train acc: 77.70270270270271, test loss: 0.601073, test acc: 70.47270270270269, bias2: 0.4700278639793396, variance: 0.13104504346847534
Train size: [784] hidden size: [1444] batch size: [10] trial: 37, train_loss: 0.576289, train acc: 76.97368421052632, test loss: 0.601122, test acc: 70.45552631578946, bias2: 0.4698798358440399, variance: 0.13124236464500427
Train size: [784] hidden size: [1444] batch size: [10] trial: 38, train_loss: 0.579272, train acc: 7

Train size: [784] hidden size: [1663] batch size: [10] trial: 21, train_loss: 0.510691, train acc: 85.22727272727273, test loss: 0.585073, test acc: 71.89727272727275, bias2: 0.46042200922966003, variance: 0.12465128302574158
Train size: [784] hidden size: [1663] batch size: [10] trial: 22, train_loss: 0.516235, train acc: 84.78260869565217, test loss: 0.584693, test acc: 71.93260869565219, bias2: 0.4597460925579071, variance: 0.12494686245918274
Train size: [784] hidden size: [1663] batch size: [10] trial: 23, train_loss: 0.516732, train acc: 84.375, test loss: 0.584967, test acc: 71.91000000000003, bias2: 0.4595709443092346, variance: 0.12539632618427277
Train size: [784] hidden size: [1663] batch size: [10] trial: 24, train_loss: 0.517557, train acc: 84.0, test loss: 0.584968, test acc: 71.92840000000002, bias2: 0.45937368273735046, variance: 0.12559422850608826
Train size: [784] hidden size: [1663] batch size: [10] trial: 25, train_loss: 0.507985, train acc: 84.61538461538461, test

Train size: [784] hidden size: [1915] batch size: [10] trial: 8, train_loss: 0.441040, train acc: 83.33333333333333, test loss: 0.566236, test acc: 73.75444444444445, bias2: 0.45459693670272827, variance: 0.11163938045501709
Train size: [784] hidden size: [1915] batch size: [10] trial: 9, train_loss: 0.454255, train acc: 82.5, test loss: 0.566384, test acc: 73.712, bias2: 0.45326128602027893, variance: 0.11312231421470642
Train size: [784] hidden size: [1915] batch size: [10] trial: 10, train_loss: 0.430654, train acc: 84.0909090909091, test loss: 0.565228, test acc: 73.79818181818182, bias2: 0.4518391191959381, variance: 0.11338893324136734
Train size: [784] hidden size: [1915] batch size: [10] trial: 11, train_loss: 0.450660, train acc: 81.25, test loss: 0.564115, test acc: 73.87083333333332, bias2: 0.45051461458206177, variance: 0.11360041797161102
Train size: [784] hidden size: [1915] batch size: [10] trial: 12, train_loss: 0.462955, train acc: 78.84615384615384, test loss: 0.56340

Train size: [784] hidden size: [1915] batch size: [10] trial: 45, train_loss: 0.513228, train acc: 80.43478260869566, test loss: 0.565923, test acc: 73.97760869565217, bias2: 0.4434165954589844, variance: 0.12250678986310959
Train size: [784] hidden size: [1915] batch size: [10] trial: 46, train_loss: 0.514453, train acc: 79.7872340425532, test loss: 0.565940, test acc: 73.93191489361702, bias2: 0.4433961808681488, variance: 0.12254343181848526
Train size: [784] hidden size: [1915] batch size: [10] trial: 47, train_loss: 0.516233, train acc: 79.6875, test loss: 0.565993, test acc: 73.91416666666666, bias2: 0.44344329833984375, variance: 0.12254966050386429
Train size: [784] hidden size: [1915] batch size: [10] trial: 48, train_loss: 0.513786, train acc: 79.59183673469387, test loss: 0.565857, test acc: 73.92367346938775, bias2: 0.4432303309440613, variance: 0.12262643128633499
Train size: [784] hidden size: [1915] batch size: [10] trial: 49, train_loss: 0.512273, train acc: 79.5, test 

Train size: [784] hidden size: [2204] batch size: [10] trial: 33, train_loss: 0.554165, train acc: 77.94117647058823, test loss: 0.547830, test acc: 75.27617647058824, bias2: 0.4306638240814209, variance: 0.11716658622026443
Train size: [784] hidden size: [2204] batch size: [10] trial: 34, train_loss: 0.551442, train acc: 77.85714285714286, test loss: 0.548074, test acc: 75.2837142857143, bias2: 0.43075501918792725, variance: 0.11731918156147003
Train size: [784] hidden size: [2204] batch size: [10] trial: 35, train_loss: 0.556448, train acc: 77.08333333333333, test loss: 0.548120, test acc: 75.2788888888889, bias2: 0.4306729733943939, variance: 0.11744740605354309
Train size: [784] hidden size: [2204] batch size: [10] trial: 36, train_loss: 0.557469, train acc: 77.02702702702703, test loss: 0.548136, test acc: 75.31567567567568, bias2: 0.43060752749443054, variance: 0.1175280436873436
Train size: [784] hidden size: [2204] batch size: [10] trial: 37, train_loss: 0.554039, train acc: 77

Train size: [784] hidden size: [2538] batch size: [10] trial: 20, train_loss: 0.507269, train acc: 79.76190476190476, test loss: 0.533513, test acc: 76.71952380952379, bias2: 0.420926570892334, variance: 0.11258627474308014
Train size: [784] hidden size: [2538] batch size: [10] trial: 21, train_loss: 0.501886, train acc: 80.68181818181819, test loss: 0.533647, test acc: 76.67499999999998, bias2: 0.42112013697624207, variance: 0.1125272884964943
Train size: [784] hidden size: [2538] batch size: [10] trial: 22, train_loss: 0.501370, train acc: 81.52173913043478, test loss: 0.533702, test acc: 76.73043478260868, bias2: 0.4208776652812958, variance: 0.11282458156347275
Train size: [784] hidden size: [2538] batch size: [10] trial: 23, train_loss: 0.497767, train acc: 81.25, test loss: 0.533934, test acc: 76.69208333333331, bias2: 0.42091619968414307, variance: 0.11301763355731964
Train size: [784] hidden size: [2538] batch size: [10] trial: 24, train_loss: 0.488115, train acc: 82.0, test lo

Train size: [784] hidden size: [2922] batch size: [10] trial: 7, train_loss: 0.444878, train acc: 84.375, test loss: 0.521098, test acc: 77.90749999999998, bias2: 0.421705037355423, variance: 0.09939321875572205
Train size: [784] hidden size: [2922] batch size: [10] trial: 8, train_loss: 0.446512, train acc: 83.33333333333333, test loss: 0.521916, test acc: 77.66333333333333, bias2: 0.4204040765762329, variance: 0.10151141881942749
Train size: [784] hidden size: [2922] batch size: [10] trial: 9, train_loss: 0.476656, train acc: 80.0, test loss: 0.520660, test acc: 77.76499999999999, bias2: 0.4180735945701599, variance: 0.10258687287569046
Train size: [784] hidden size: [2922] batch size: [10] trial: 10, train_loss: 0.471524, train acc: 81.81818181818181, test loss: 0.522223, test acc: 77.49636363636363, bias2: 0.417996883392334, variance: 0.10422611236572266
Train size: [784] hidden size: [2922] batch size: [10] trial: 11, train_loss: 0.473568, train acc: 81.25, test loss: 0.521913, te

Train size: [784] hidden size: [2922] batch size: [10] trial: 44, train_loss: 0.487633, train acc: 79.44444444444444, test loss: 0.522428, test acc: 77.47311111111107, bias2: 0.40960317850112915, variance: 0.11282462626695633
Train size: [784] hidden size: [2922] batch size: [10] trial: 45, train_loss: 0.487838, train acc: 79.34782608695652, test loss: 0.522502, test acc: 77.47934782608692, bias2: 0.40955162048339844, variance: 0.11295054852962494
Train size: [784] hidden size: [2922] batch size: [10] trial: 46, train_loss: 0.486642, train acc: 79.25531914893617, test loss: 0.522486, test acc: 77.4882978723404, bias2: 0.4095553457736969, variance: 0.11293087154626846
Train size: [784] hidden size: [2922] batch size: [10] trial: 47, train_loss: 0.483540, train acc: 79.16666666666667, test loss: 0.522563, test acc: 77.4914583333333, bias2: 0.40958234667778015, variance: 0.11298110336065292
Train size: [784] hidden size: [2922] batch size: [10] trial: 48, train_loss: 0.483492, train acc: 

Train size: [784] hidden size: [3365] batch size: [10] trial: 31, train_loss: 0.453584, train acc: 81.25, test loss: 0.506748, test acc: 78.95437499999998, bias2: 0.39831116795539856, variance: 0.10843661427497864
Train size: [784] hidden size: [3365] batch size: [10] trial: 32, train_loss: 0.455409, train acc: 81.06060606060606, test loss: 0.507161, test acc: 78.91575757575755, bias2: 0.39850494265556335, variance: 0.10865577310323715
Train size: [784] hidden size: [3365] batch size: [10] trial: 33, train_loss: 0.459775, train acc: 80.88235294117646, test loss: 0.507233, test acc: 78.92794117647057, bias2: 0.3984943926334381, variance: 0.10873880237340927
Train size: [784] hidden size: [3365] batch size: [10] trial: 34, train_loss: 0.464042, train acc: 80.71428571428571, test loss: 0.507520, test acc: 78.87057142857141, bias2: 0.3986746072769165, variance: 0.10884565860033035
Train size: [784] hidden size: [3365] batch size: [10] trial: 35, train_loss: 0.456947, train acc: 81.25, test

Train size: [784] hidden size: [3874] batch size: [10] trial: 19, train_loss: 0.458563, train acc: 85.0, test loss: 0.495453, test acc: 79.932, bias2: 0.3913280963897705, variance: 0.10412464290857315
Train size: [784] hidden size: [3874] batch size: [10] trial: 20, train_loss: 0.455680, train acc: 84.52380952380952, test loss: 0.495480, test acc: 79.96285714285715, bias2: 0.3910682201385498, variance: 0.10441205650568008
Train size: [784] hidden size: [3874] batch size: [10] trial: 21, train_loss: 0.450348, train acc: 85.22727272727273, test loss: 0.495585, test acc: 79.90727272727273, bias2: 0.39078110456466675, variance: 0.10480376332998276
Train size: [784] hidden size: [3874] batch size: [10] trial: 22, train_loss: 0.439380, train acc: 85.8695652173913, test loss: 0.495779, test acc: 79.8804347826087, bias2: 0.39061644673347473, variance: 0.10516250878572464
Train size: [784] hidden size: [3874] batch size: [10] trial: 23, train_loss: 0.440906, train acc: 85.41666666666667, test l

Train size: [784] hidden size: [4461] batch size: [10] trial: 6, train_loss: 0.468003, train acc: 82.14285714285714, test loss: 0.481234, test acc: 80.61000000000001, bias2: 0.39077499508857727, variance: 0.09045856446027756
Train size: [784] hidden size: [4461] batch size: [10] trial: 7, train_loss: 0.449411, train acc: 84.375, test loss: 0.480259, test acc: 80.65, bias2: 0.38840293884277344, variance: 0.09185610711574554
Train size: [784] hidden size: [4461] batch size: [10] trial: 8, train_loss: 0.476308, train acc: 83.33333333333333, test loss: 0.480680, test acc: 80.65555555555557, bias2: 0.38714098930358887, variance: 0.09353886544704437
Train size: [784] hidden size: [4461] batch size: [10] trial: 9, train_loss: 0.497464, train acc: 82.5, test loss: 0.481471, test acc: 80.61100000000002, bias2: 0.3862796425819397, variance: 0.0951915830373764
Train size: [784] hidden size: [4461] batch size: [10] trial: 10, train_loss: 0.492345, train acc: 84.0909090909091, test loss: 0.480864, 

Train size: [784] hidden size: [4461] batch size: [10] trial: 43, train_loss: 0.424049, train acc: 84.6590909090909, test loss: 0.482913, test acc: 80.66863636363634, bias2: 0.37946617603302, variance: 0.10344681143760681
Train size: [784] hidden size: [4461] batch size: [10] trial: 44, train_loss: 0.419810, train acc: 85.0, test loss: 0.482745, test acc: 80.68888888888887, bias2: 0.3793220520019531, variance: 0.10342325270175934
Train size: [784] hidden size: [4461] batch size: [10] trial: 45, train_loss: 0.418476, train acc: 85.32608695652173, test loss: 0.483064, test acc: 80.65086956521738, bias2: 0.3794507384300232, variance: 0.10361321270465851
Train size: [784] hidden size: [4461] batch size: [10] trial: 46, train_loss: 0.424928, train acc: 84.57446808510639, test loss: 0.483344, test acc: 80.63638297872338, bias2: 0.37950098514556885, variance: 0.10384340584278107
Train size: [784] hidden size: [4461] batch size: [10] trial: 47, train_loss: 0.424229, train acc: 84.8958333333333

Train size: [784] hidden size: [5136] batch size: [10] trial: 30, train_loss: 0.388453, train acc: 88.70967741935483, test loss: 0.471033, test acc: 81.53774193548388, bias2: 0.3690091669559479, variance: 0.10202392190694809
Train size: [784] hidden size: [5136] batch size: [10] trial: 31, train_loss: 0.383909, train acc: 89.0625, test loss: 0.470799, test acc: 81.574375, bias2: 0.36875224113464355, variance: 0.10204662382602692
Train size: [784] hidden size: [5136] batch size: [10] trial: 32, train_loss: 0.380929, train acc: 89.39393939393939, test loss: 0.470605, test acc: 81.60666666666667, bias2: 0.36847975850105286, variance: 0.10212486237287521
Train size: [784] hidden size: [5136] batch size: [10] trial: 33, train_loss: 0.378519, train acc: 89.70588235294117, test loss: 0.470446, test acc: 81.63176470588235, bias2: 0.3684118688106537, variance: 0.10203444212675095
Train size: [784] hidden size: [5136] batch size: [10] trial: 34, train_loss: 0.378430, train acc: 90.0, test loss: 

Train size: [784] hidden size: [5914] batch size: [10] trial: 17, train_loss: 0.354806, train acc: 91.66666666666667, test loss: 0.460123, test acc: 82.15722222222223, bias2: 0.36171412467956543, variance: 0.0984090194106102
Train size: [784] hidden size: [5914] batch size: [10] trial: 18, train_loss: 0.353875, train acc: 92.10526315789474, test loss: 0.459734, test acc: 82.16894736842106, bias2: 0.3612251877784729, variance: 0.09850865602493286
Train size: [784] hidden size: [5914] batch size: [10] trial: 19, train_loss: 0.355734, train acc: 91.25, test loss: 0.460083, test acc: 82.088, bias2: 0.36126983165740967, variance: 0.09881268441677094
Train size: [784] hidden size: [5914] batch size: [10] trial: 20, train_loss: 0.371141, train acc: 90.47619047619048, test loss: 0.459571, test acc: 82.11285714285714, bias2: 0.36069759726524353, variance: 0.09887367486953735
Train size: [784] hidden size: [5914] batch size: [10] trial: 21, train_loss: 0.370492, train acc: 90.9090909090909, test

Train size: [784] hidden size: [6809] batch size: [10] trial: 4, train_loss: 0.277945, train acc: 100.0, test loss: 0.448597, test acc: 82.842, bias2: 0.3655128479003906, variance: 0.08308424055576324
Train size: [784] hidden size: [6809] batch size: [10] trial: 5, train_loss: 0.259583, train acc: 100.0, test loss: 0.449597, test acc: 82.66333333333333, bias2: 0.363527774810791, variance: 0.08606930077075958
Train size: [784] hidden size: [6809] batch size: [10] trial: 6, train_loss: 0.284182, train acc: 100.0, test loss: 0.449228, test acc: 82.71, bias2: 0.3611583113670349, variance: 0.08807013928890228
Train size: [784] hidden size: [6809] batch size: [10] trial: 7, train_loss: 0.292890, train acc: 100.0, test loss: 0.449169, test acc: 82.74999999999999, bias2: 0.35955190658569336, variance: 0.08961708843708038
Train size: [784] hidden size: [6809] batch size: [10] trial: 8, train_loss: 0.280901, train acc: 100.0, test loss: 0.448883, test acc: 82.73777777777777, bias2: 0.35847154259

Train size: [784] hidden size: [6809] batch size: [10] trial: 42, train_loss: 0.347208, train acc: 92.44186046511628, test loss: 0.449321, test acc: 82.8853488372093, bias2: 0.34968245029449463, variance: 0.09963814169168472
Train size: [784] hidden size: [6809] batch size: [10] trial: 43, train_loss: 0.343724, train acc: 92.61363636363636, test loss: 0.449188, test acc: 82.89113636363636, bias2: 0.34947699308395386, variance: 0.09971059113740921
Train size: [784] hidden size: [6809] batch size: [10] trial: 44, train_loss: 0.345782, train acc: 92.77777777777777, test loss: 0.449217, test acc: 82.89288888888889, bias2: 0.3494504988193512, variance: 0.09976669400930405
Train size: [784] hidden size: [6809] batch size: [10] trial: 45, train_loss: 0.343671, train acc: 92.93478260869566, test loss: 0.449056, test acc: 82.89652173913043, bias2: 0.34926483035087585, variance: 0.0997915044426918
Train size: [784] hidden size: [6809] batch size: [10] trial: 46, train_loss: 0.343122, train acc: 

Train size: [784] hidden size: [7840] batch size: [10] trial: 29, train_loss: 0.291506, train acc: 95.83333333333333, test loss: 0.440937, test acc: 83.26266666666665, bias2: 0.34200501441955566, variance: 0.09893162548542023
Train size: [784] hidden size: [7840] batch size: [10] trial: 30, train_loss: 0.287211, train acc: 95.96774193548387, test loss: 0.440841, test acc: 83.30999999999997, bias2: 0.3418716788291931, variance: 0.09896979480981827
Train size: [784] hidden size: [7840] batch size: [10] trial: 31, train_loss: 0.292513, train acc: 96.09375, test loss: 0.440776, test acc: 83.33656249999997, bias2: 0.34159278869628906, variance: 0.09918337315320969
Train size: [784] hidden size: [7840] batch size: [10] trial: 32, train_loss: 0.288878, train acc: 96.21212121212122, test loss: 0.440704, test acc: 83.35212121212118, bias2: 0.3413328230381012, variance: 0.09937068074941635
Train size: [784] hidden size: [7840] batch size: [10] trial: 33, train_loss: 0.289312, train acc: 96.32352

Train size: [784] hidden size: [8] batch size: [784] trial: 16, train_loss: 1.362970, train acc: 9.461284513805522, test loss: 1.369931, test acc: 9.380588235294118, bias2: 0.9652465581893921, variance: 0.4046843349933624
Train size: [784] hidden size: [8] batch size: [784] trial: 17, train_loss: 1.355597, train acc: 9.630102040816325, test loss: 1.361685, test acc: 9.592222222222222, bias2: 0.9613471627235413, variance: 0.4003376364707947
Train size: [784] hidden size: [8] batch size: [784] trial: 18, train_loss: 1.358786, train acc: 9.740870032223414, test loss: 1.366305, test acc: 9.710526315789474, bias2: 0.9636949300765991, variance: 0.4026096761226654
Train size: [784] hidden size: [8] batch size: [784] trial: 19, train_loss: 1.347086, train acc: 9.968112244897958, test loss: 1.355589, test acc: 9.9525, bias2: 0.9557428359985352, variance: 0.39984628558158875
Train size: [784] hidden size: [8] batch size: [784] trial: 20, train_loss: 1.356235, train acc: 9.94290573372206, test lo

Train size: [784] hidden size: [9] batch size: [784] trial: 3, train_loss: 1.323000, train acc: 9.78954081632653, test loss: 1.328417, test acc: 9.807500000000001, bias2: 0.9861687421798706, variance: 0.34224870800971985
Train size: [784] hidden size: [9] batch size: [784] trial: 4, train_loss: 1.344356, train acc: 9.209183673469386, test loss: 1.349875, test acc: 9.522000000000002, bias2: 0.9975483417510986, variance: 0.3523271083831787
Train size: [784] hidden size: [9] batch size: [784] trial: 5, train_loss: 1.336468, train acc: 9.460034013605442, test loss: 1.344374, test acc: 9.398333333333335, bias2: 0.9829016923904419, variance: 0.3614724576473236
Train size: [784] hidden size: [9] batch size: [784] trial: 6, train_loss: 1.347632, train acc: 9.675655976676385, test loss: 1.351109, test acc: 9.761428571428572, bias2: 0.9814509153366089, variance: 0.3696577548980713
Train size: [784] hidden size: [9] batch size: [784] trial: 7, train_loss: 1.375222, train acc: 9.375, test loss: 1.

Train size: [784] hidden size: [9] batch size: [784] trial: 40, train_loss: 1.335476, train acc: 10.362742658038822, test loss: 1.341458, test acc: 10.33926829268293, bias2: 0.9459794759750366, variance: 0.3954787254333496
Train size: [784] hidden size: [9] batch size: [784] trial: 41, train_loss: 1.334532, train acc: 10.407555879494653, test loss: 1.340039, test acc: 10.407142857142858, bias2: 0.9473415613174438, variance: 0.39269769191741943
Train size: [784] hidden size: [9] batch size: [784] trial: 42, train_loss: 1.329461, train acc: 10.468082581869956, test loss: 1.335116, test acc: 10.45279069767442, bias2: 0.9436757564544678, variance: 0.3914402723312378
Train size: [784] hidden size: [9] batch size: [784] trial: 43, train_loss: 1.333799, train acc: 10.563543599257883, test loss: 1.339105, test acc: 10.586136363636365, bias2: 0.9449253082275391, variance: 0.39417970180511475
Train size: [784] hidden size: [9] batch size: [784] trial: 44, train_loss: 1.334236, train acc: 10.5753

Train size: [784] hidden size: [10] batch size: [784] trial: 27, train_loss: 1.309032, train acc: 10.40451895043732, test loss: 1.315043, test acc: 10.08607142857143, bias2: 0.9479839205741882, variance: 0.36705857515335083
Train size: [784] hidden size: [10] batch size: [784] trial: 28, train_loss: 1.309374, train acc: 10.415200562983816, test loss: 1.314977, test acc: 10.049655172413793, bias2: 0.9468210935592651, variance: 0.36815547943115234
Train size: [784] hidden size: [10] batch size: [784] trial: 29, train_loss: 1.306090, train acc: 10.488945578231293, test loss: 1.312071, test acc: 10.108666666666666, bias2: 0.9446249008178711, variance: 0.3674464523792267
Train size: [784] hidden size: [10] batch size: [784] trial: 30, train_loss: 1.300932, train acc: 10.467412771560237, test loss: 1.306994, test acc: 10.146451612903224, bias2: 0.9419549703598022, variance: 0.36503899097442627
Train size: [784] hidden size: [10] batch size: [784] trial: 31, train_loss: 1.301366, train acc: 1

Train size: [784] hidden size: [12] batch size: [784] trial: 14, train_loss: 1.290874, train acc: 11.113945578231293, test loss: 1.300254, test acc: 11.017333333333333, bias2: 0.952593207359314, variance: 0.34766098856925964
Train size: [784] hidden size: [12] batch size: [784] trial: 15, train_loss: 1.305671, train acc: 10.849808673469388, test loss: 1.314974, test acc: 10.821874999999999, bias2: 0.9615008234977722, variance: 0.3534730076789856
Train size: [784] hidden size: [12] batch size: [784] trial: 16, train_loss: 1.303559, train acc: 10.646758703481392, test loss: 1.313171, test acc: 10.55470588235294, bias2: 0.9625988006591797, variance: 0.35057270526885986
Train size: [784] hidden size: [12] batch size: [784] trial: 17, train_loss: 1.302462, train acc: 10.671768707482993, test loss: 1.311739, test acc: 10.496666666666664, bias2: 0.9580135345458984, variance: 0.3537255823612213
Train size: [784] hidden size: [12] batch size: [784] trial: 18, train_loss: 1.308140, train acc: 10

Train size: [784] hidden size: [14] batch size: [784] trial: 0, train_loss: 1.277324, train acc: 8.928571428571429, test loss: 1.234630, test acc: 9.01, bias2: 1.2346298694610596, variance: -1.1677644407015464e-09
Train size: [784] hidden size: [14] batch size: [784] trial: 1, train_loss: 1.324104, train acc: 9.885204081632654, test loss: 1.287033, test acc: 9.620000000000001, bias2: 1.0537669658660889, variance: 0.2332654595375061
Train size: [784] hidden size: [14] batch size: [784] trial: 2, train_loss: 1.289738, train acc: 13.010204081632656, test loss: 1.269578, test acc: 13.340000000000002, bias2: 0.9940531253814697, variance: 0.27552488446235657
Train size: [784] hidden size: [14] batch size: [784] trial: 3, train_loss: 1.294388, train acc: 11.22448979591837, test loss: 1.281645, test acc: 11.4875, bias2: 0.989732027053833, variance: 0.2919129431247711
Train size: [784] hidden size: [14] batch size: [784] trial: 4, train_loss: 1.294336, train acc: 10.714285714285717, test loss: 

Train size: [784] hidden size: [14] batch size: [784] trial: 37, train_loss: 1.286976, train acc: 10.251074113856072, test loss: 1.286450, test acc: 10.363684210526312, bias2: 0.9297518730163574, variance: 0.3566981554031372
Train size: [784] hidden size: [14] batch size: [784] trial: 38, train_loss: 1.286190, train acc: 10.243328100470961, test loss: 1.285392, test acc: 10.313333333333329, bias2: 0.9300944805145264, variance: 0.35529711842536926
Train size: [784] hidden size: [14] batch size: [784] trial: 39, train_loss: 1.286495, train acc: 10.159438775510207, test loss: 1.285932, test acc: 10.214749999999997, bias2: 0.9312527775764465, variance: 0.3546794056892395
Train size: [784] hidden size: [14] batch size: [784] trial: 40, train_loss: 1.287049, train acc: 10.275634644101547, test loss: 1.285972, test acc: 10.32634146341463, bias2: 0.9309349060058594, variance: 0.35503697395324707
Train size: [784] hidden size: [14] batch size: [784] trial: 41, train_loss: 1.284044, train acc: 1

Train size: [784] hidden size: [16] batch size: [784] trial: 24, train_loss: 1.267263, train acc: 10.096938775510205, test loss: 1.270718, test acc: 10.353599999999998, bias2: 0.9422011375427246, variance: 0.3285169303417206
Train size: [784] hidden size: [16] batch size: [784] trial: 25, train_loss: 1.267945, train acc: 9.973508634222922, test loss: 1.271388, test acc: 10.226923076923075, bias2: 0.9426454305648804, variance: 0.3287425935268402
Train size: [784] hidden size: [16] batch size: [784] trial: 26, train_loss: 1.270207, train acc: 10.052910052910054, test loss: 1.274571, test acc: 10.244444444444444, bias2: 0.9420575499534607, variance: 0.33251386880874634
Train size: [784] hidden size: [16] batch size: [784] trial: 27, train_loss: 1.267777, train acc: 10.103862973760934, test loss: 1.271619, test acc: 10.331785714285713, bias2: 0.9408954381942749, variance: 0.33072349429130554
Train size: [784] hidden size: [16] batch size: [784] trial: 28, train_loss: 1.267380, train acc: 1

Train size: [784] hidden size: [18] batch size: [784] trial: 11, train_loss: 1.237045, train acc: 12.404336734693876, test loss: 1.240413, test acc: 11.828333333333333, bias2: 0.9296060800552368, variance: 0.31080710887908936
Train size: [784] hidden size: [18] batch size: [784] trial: 12, train_loss: 1.237649, train acc: 12.352825745682887, test loss: 1.241906, test acc: 11.80153846153846, bias2: 0.9268279075622559, variance: 0.31507793068885803
Train size: [784] hidden size: [18] batch size: [784] trial: 13, train_loss: 1.239158, train acc: 12.181122448979592, test loss: 1.243449, test acc: 11.754285714285714, bias2: 0.9295817613601685, variance: 0.31386685371398926
Train size: [784] hidden size: [18] batch size: [784] trial: 14, train_loss: 1.233754, train acc: 12.287414965986393, test loss: 1.238207, test acc: 11.800666666666666, bias2: 0.9265131950378418, variance: 0.3116935193538666
Train size: [784] hidden size: [18] batch size: [784] trial: 15, train_loss: 1.232692, train acc: 

Train size: [784] hidden size: [18] batch size: [784] trial: 48, train_loss: 1.229175, train acc: 11.349437734277382, test loss: 1.232843, test acc: 11.098367346938774, bias2: 0.9096347689628601, variance: 0.323208749294281
Train size: [784] hidden size: [18] batch size: [784] trial: 49, train_loss: 1.228667, train acc: 11.349489795918364, test loss: 1.232479, test acc: 11.081799999999998, bias2: 0.9092367887496948, variance: 0.3232421875
##################################################
Train size: [784] hidden size: [21] batch size: [784] trial: 0, train_loss: 1.170552, train acc: 10.841836734693878, test loss: 1.180500, test acc: 9.91, bias2: 1.180499792098999, variance: 2.1409014561157846e-09
Train size: [784] hidden size: [21] batch size: [784] trial: 1, train_loss: 1.145427, train acc: 10.459183673469388, test loss: 1.159008, test acc: 9.780000000000001, bias2: 1.040785551071167, variance: 0.11822295188903809
Train size: [784] hidden size: [21] batch size: [784] trial: 2, train_

Train size: [784] hidden size: [21] batch size: [784] trial: 35, train_loss: 1.217986, train acc: 11.777210884353742, test loss: 1.221604, test acc: 11.472777777777777, bias2: 0.9217973947525024, variance: 0.2998064458370209
Train size: [784] hidden size: [21] batch size: [784] trial: 36, train_loss: 1.216667, train acc: 11.900165471594045, test loss: 1.219680, test acc: 11.608918918918919, bias2: 0.9227085113525391, variance: 0.2969716787338257
Train size: [784] hidden size: [21] batch size: [784] trial: 37, train_loss: 1.216320, train acc: 11.828678839957037, test loss: 1.218672, test acc: 11.566052631578946, bias2: 0.9212039113044739, variance: 0.2974686026573181
Train size: [784] hidden size: [21] batch size: [784] trial: 38, train_loss: 1.217267, train acc: 11.715070643642074, test loss: 1.219494, test acc: 11.50923076923077, bias2: 0.9217666387557983, variance: 0.2977276146411896
Train size: [784] hidden size: [21] batch size: [784] trial: 39, train_loss: 1.217045, train acc: 11.

Train size: [784] hidden size: [24] batch size: [784] trial: 22, train_loss: 1.194435, train acc: 11.224489795918366, test loss: 1.199256, test acc: 11.333913043478262, bias2: 0.9031686782836914, variance: 0.29608777165412903
Train size: [784] hidden size: [24] batch size: [784] trial: 23, train_loss: 1.195701, train acc: 11.139455782312924, test loss: 1.200090, test acc: 11.215000000000002, bias2: 0.9015243053436279, variance: 0.29856589436531067
Train size: [784] hidden size: [24] batch size: [784] trial: 24, train_loss: 1.196893, train acc: 11.147959183673468, test loss: 1.202346, test acc: 11.098000000000003, bias2: 0.9002562761306763, variance: 0.3020900785923004
Train size: [784] hidden size: [24] batch size: [784] trial: 25, train_loss: 1.199454, train acc: 11.092032967032965, test loss: 1.204022, test acc: 11.033076923076926, bias2: 0.8996076583862305, variance: 0.30441465973854065
Train size: [784] hidden size: [24] batch size: [784] trial: 26, train_loss: 1.198763, train acc:

Train size: [784] hidden size: [28] batch size: [784] trial: 9, train_loss: 1.174119, train acc: 11.096938775510203, test loss: 1.185757, test acc: 10.796999999999999, bias2: 0.916561484336853, variance: 0.2691955268383026
Train size: [784] hidden size: [28] batch size: [784] trial: 10, train_loss: 1.170850, train acc: 11.212894248608533, test loss: 1.181127, test acc: 10.904545454545454, bias2: 0.9100538492202759, variance: 0.27107271552085876
Train size: [784] hidden size: [28] batch size: [784] trial: 11, train_loss: 1.171117, train acc: 11.479591836734693, test loss: 1.181382, test acc: 11.0, bias2: 0.9087451100349426, variance: 0.2726368308067322
Train size: [784] hidden size: [28] batch size: [784] trial: 12, train_loss: 1.179793, train acc: 11.459968602825747, test loss: 1.188367, test acc: 11.046923076923077, bias2: 0.9086224436759949, variance: 0.27974408864974976
Train size: [784] hidden size: [28] batch size: [784] trial: 13, train_loss: 1.179116, train acc: 11.2244897959183

Train size: [784] hidden size: [28] batch size: [784] trial: 46, train_loss: 1.167915, train acc: 11.810681719496309, test loss: 1.171598, test acc: 11.80468085106383, bias2: 0.8847322463989258, variance: 0.2868654429912567
Train size: [784] hidden size: [28] batch size: [784] trial: 47, train_loss: 1.167873, train acc: 11.795812074829932, test loss: 1.171277, test acc: 11.830208333333333, bias2: 0.8848377466201782, variance: 0.28643956780433655
Train size: [784] hidden size: [28] batch size: [784] trial: 48, train_loss: 1.167837, train acc: 11.875260308204915, test loss: 1.171399, test acc: 11.892040816326531, bias2: 0.8837909698486328, variance: 0.2876081168651581
Train size: [784] hidden size: [28] batch size: [784] trial: 49, train_loss: 1.167934, train acc: 11.795918367346939, test loss: 1.171705, test acc: 11.8444, bias2: 0.884913980960846, variance: 0.28679126501083374
##################################################
Train size: [784] hidden size: [32] batch size: [784] trial:

Train size: [784] hidden size: [32] batch size: [784] trial: 33, train_loss: 1.166395, train acc: 12.10984393757503, test loss: 1.173391, test acc: 11.838529411764704, bias2: 0.8904704451560974, variance: 0.28292006254196167
Train size: [784] hidden size: [32] batch size: [784] trial: 34, train_loss: 1.164798, train acc: 12.120991253644315, test loss: 1.171676, test acc: 11.837999999999997, bias2: 0.8896434307098389, variance: 0.2820322513580322
Train size: [784] hidden size: [32] batch size: [784] trial: 35, train_loss: 1.163795, train acc: 12.039399092970521, test loss: 1.170429, test acc: 11.775277777777776, bias2: 0.8899093866348267, variance: 0.2805195748806
Train size: [784] hidden size: [32] batch size: [784] trial: 36, train_loss: 1.164743, train acc: 11.879481522338665, test loss: 1.170823, test acc: 11.679459459459457, bias2: 0.8906814455986023, variance: 0.28014177083969116
Train size: [784] hidden size: [32] batch size: [784] trial: 37, train_loss: 1.164237, train acc: 11.8

Train size: [784] hidden size: [37] batch size: [784] trial: 20, train_loss: 1.135257, train acc: 12.858357628765791, test loss: 1.138710, test acc: 12.90952380952381, bias2: 0.8756235837936401, variance: 0.26308679580688477
Train size: [784] hidden size: [37] batch size: [784] trial: 21, train_loss: 1.134998, train acc: 12.88265306122449, test loss: 1.139533, test acc: 12.770454545454548, bias2: 0.8763744831085205, variance: 0.26315879821777344
Train size: [784] hidden size: [37] batch size: [784] trial: 22, train_loss: 1.132170, train acc: 13.171029281277729, test loss: 1.136425, test acc: 13.001739130434784, bias2: 0.871999979019165, variance: 0.264425128698349
Train size: [784] hidden size: [37] batch size: [784] trial: 23, train_loss: 1.134021, train acc: 13.042091836734693, test loss: 1.138037, test acc: 12.835416666666667, bias2: 0.8740577697753906, variance: 0.2639787197113037
Train size: [784] hidden size: [37] batch size: [784] trial: 24, train_loss: 1.134636, train acc: 12.8

Train size: [784] hidden size: [43] batch size: [784] trial: 7, train_loss: 1.124991, train acc: 11.639030612244898, test loss: 1.127265, test acc: 11.307500000000001, bias2: 0.9067783355712891, variance: 0.2204866111278534
Train size: [784] hidden size: [43] batch size: [784] trial: 8, train_loss: 1.127226, train acc: 11.93310657596372, test loss: 1.128537, test acc: 11.59888888888889, bias2: 0.9012295603752136, variance: 0.2273070216178894
Train size: [784] hidden size: [43] batch size: [784] trial: 9, train_loss: 1.128701, train acc: 11.823979591836736, test loss: 1.131719, test acc: 11.327000000000002, bias2: 0.9005181789398193, variance: 0.2312004715204239
Train size: [784] hidden size: [43] batch size: [784] trial: 10, train_loss: 1.128238, train acc: 11.76948051948052, test loss: 1.133008, test acc: 11.408181818181818, bias2: 0.8976601362228394, variance: 0.23534807562828064
Train size: [784] hidden size: [43] batch size: [784] trial: 11, train_loss: 1.127701, train acc: 12.1386

Train size: [784] hidden size: [43] batch size: [784] trial: 44, train_loss: 1.119611, train acc: 13.421201814058955, test loss: 1.125980, test acc: 13.058444444444444, bias2: 0.8655058145523071, variance: 0.26047369837760925
Train size: [784] hidden size: [43] batch size: [784] trial: 45, train_loss: 1.119686, train acc: 13.356810115350488, test loss: 1.125818, test acc: 12.99717391304348, bias2: 0.8655929565429688, variance: 0.26022493839263916
Train size: [784] hidden size: [43] batch size: [784] trial: 46, train_loss: 1.121260, train acc: 13.243595310464611, test loss: 1.127511, test acc: 12.865531914893616, bias2: 0.8680127859115601, variance: 0.2594982087612152
Train size: [784] hidden size: [43] batch size: [784] trial: 47, train_loss: 1.119926, train acc: 13.24139030612245, test loss: 1.126390, test acc: 12.866458333333332, bias2: 0.8677035570144653, variance: 0.25868651270866394
Train size: [784] hidden size: [43] batch size: [784] trial: 48, train_loss: 1.120836, train acc: 1

Train size: [784] hidden size: [49] batch size: [784] trial: 31, train_loss: 1.111154, train acc: 13.14174107142857, test loss: 1.117015, test acc: 13.196874999999999, bias2: 0.8618957996368408, variance: 0.2551194429397583
Train size: [784] hidden size: [49] batch size: [784] trial: 32, train_loss: 1.111269, train acc: 13.12229437229437, test loss: 1.117797, test acc: 13.105151515151514, bias2: 0.8635290861129761, variance: 0.2542683184146881
Train size: [784] hidden size: [49] batch size: [784] trial: 33, train_loss: 1.112503, train acc: 13.028961584633851, test loss: 1.118190, test acc: 13.02941176470588, bias2: 0.8644739389419556, variance: 0.2537163496017456
Train size: [784] hidden size: [49] batch size: [784] trial: 34, train_loss: 1.112254, train acc: 12.882653061224488, test loss: 1.117707, test acc: 12.91942857142857, bias2: 0.8654775619506836, variance: 0.2522289752960205
Train size: [784] hidden size: [49] batch size: [784] trial: 35, train_loss: 1.113601, train acc: 12.779

Train size: [784] hidden size: [56] batch size: [784] trial: 18, train_loss: 1.086133, train acc: 14.0171858216971, test loss: 1.094340, test acc: 13.518947368421053, bias2: 0.8553715944290161, variance: 0.2389688491821289
Train size: [784] hidden size: [56] batch size: [784] trial: 19, train_loss: 1.083120, train acc: 14.234693877551019, test loss: 1.091477, test acc: 13.653, bias2: 0.8527647852897644, variance: 0.2387121319770813
Train size: [784] hidden size: [56] batch size: [784] trial: 20, train_loss: 1.085980, train acc: 14.012390670553932, test loss: 1.094200, test acc: 13.415714285714287, bias2: 0.8552579879760742, variance: 0.23894235491752625
Train size: [784] hidden size: [56] batch size: [784] trial: 21, train_loss: 1.085194, train acc: 14.250927643784784, test loss: 1.093569, test acc: 13.635454545454547, bias2: 0.853567361831665, variance: 0.24000120162963867
Train size: [784] hidden size: [56] batch size: [784] trial: 22, train_loss: 1.088035, train acc: 14.080523513753

Train size: [784] hidden size: [65] batch size: [784] trial: 5, train_loss: 1.075791, train acc: 16.73044217687075, test loss: 1.092820, test acc: 15.636666666666668, bias2: 0.8833537101745605, variance: 0.20946668088436127
Train size: [784] hidden size: [65] batch size: [784] trial: 6, train_loss: 1.074339, train acc: 16.089650145772595, test loss: 1.089750, test acc: 15.314285714285715, bias2: 0.873406171798706, variance: 0.21634410321712494
Train size: [784] hidden size: [65] batch size: [784] trial: 7, train_loss: 1.082097, train acc: 15.417729591836734, test loss: 1.097105, test acc: 14.705, bias2: 0.8723940253257751, variance: 0.22471100091934204
Train size: [784] hidden size: [65] batch size: [784] trial: 8, train_loss: 1.079143, train acc: 15.88718820861678, test loss: 1.093415, test acc: 15.372222222222222, bias2: 0.8647229075431824, variance: 0.22869163751602173
Train size: [784] hidden size: [65] batch size: [784] trial: 9, train_loss: 1.085218, train acc: 15.344387755102042

Train size: [784] hidden size: [65] batch size: [784] trial: 42, train_loss: 1.076905, train acc: 15.220099667774088, test loss: 1.087785, test acc: 14.666976744186051, bias2: 0.8428376913070679, variance: 0.24494744837284088
Train size: [784] hidden size: [65] batch size: [784] trial: 43, train_loss: 1.076074, train acc: 15.251043599257885, test loss: 1.086899, test acc: 14.708409090909095, bias2: 0.841512143611908, variance: 0.24538643658161163
Train size: [784] hidden size: [65] batch size: [784] trial: 44, train_loss: 1.076963, train acc: 15.110544217687076, test loss: 1.087311, test acc: 14.590444444444447, bias2: 0.8427879810333252, variance: 0.24452295899391174
Train size: [784] hidden size: [65] batch size: [784] trial: 45, train_loss: 1.077159, train acc: 15.089840283939665, test loss: 1.087432, test acc: 14.585869565217395, bias2: 0.8427145481109619, variance: 0.24471747875213623
Train size: [784] hidden size: [65] batch size: [784] trial: 46, train_loss: 1.076554, train acc:

Train size: [784] hidden size: [75] batch size: [784] trial: 29, train_loss: 1.053484, train acc: 16.258503401360546, test loss: 1.059466, test acc: 15.981999999999998, bias2: 0.82350093126297, variance: 0.23596493899822235
Train size: [784] hidden size: [75] batch size: [784] trial: 30, train_loss: 1.056349, train acc: 16.10434496379197, test loss: 1.062235, test acc: 15.838709677419352, bias2: 0.8235536217689514, variance: 0.238681361079216
Train size: [784] hidden size: [75] batch size: [784] trial: 31, train_loss: 1.055434, train acc: 16.230867346938776, test loss: 1.061239, test acc: 16.007499999999997, bias2: 0.8226566910743713, variance: 0.23858241736888885
Train size: [784] hidden size: [75] batch size: [784] trial: 32, train_loss: 1.057970, train acc: 16.036641929499073, test loss: 1.063704, test acc: 15.826666666666663, bias2: 0.8243535757064819, variance: 0.239350825548172
Train size: [784] hidden size: [75] batch size: [784] trial: 33, train_loss: 1.057204, train acc: 16.09

Train size: [784] hidden size: [86] batch size: [784] trial: 16, train_loss: 1.035672, train acc: 17.414465786314526, test loss: 1.045347, test acc: 16.432352941176468, bias2: 0.8129476308822632, variance: 0.2323993295431137
Train size: [784] hidden size: [86] batch size: [784] trial: 17, train_loss: 1.037612, train acc: 17.113095238095237, test loss: 1.046595, test acc: 16.28611111111111, bias2: 0.8153400421142578, variance: 0.2312549352645874
Train size: [784] hidden size: [86] batch size: [784] trial: 18, train_loss: 1.037483, train acc: 17.145542427497315, test loss: 1.046456, test acc: 16.24894736842105, bias2: 0.8146361112594604, variance: 0.23182031512260437
Train size: [784] hidden size: [86] batch size: [784] trial: 19, train_loss: 1.032249, train acc: 17.60204081632653, test loss: 1.041725, test acc: 16.814, bias2: 0.809424102306366, variance: 0.23230046033859253
Train size: [784] hidden size: [86] batch size: [784] trial: 20, train_loss: 1.031555, train acc: 17.8449951409135

Train size: [784] hidden size: [99] batch size: [784] trial: 3, train_loss: 1.030597, train acc: 16.77295918367347, test loss: 1.042730, test acc: 16.7275, bias2: 0.860307514667511, variance: 0.18242247402668
Train size: [784] hidden size: [99] batch size: [784] trial: 4, train_loss: 1.036262, train acc: 16.50510204081633, test loss: 1.047895, test acc: 16.49, bias2: 0.8521215915679932, variance: 0.19577358663082123
Train size: [784] hidden size: [99] batch size: [784] trial: 5, train_loss: 1.023762, train acc: 17.389455782312925, test loss: 1.037313, test acc: 17.209999999999997, bias2: 0.8389222621917725, variance: 0.19839084148406982
Train size: [784] hidden size: [99] batch size: [784] trial: 6, train_loss: 1.030834, train acc: 17.52915451895044, test loss: 1.043387, test acc: 17.175714285714285, bias2: 0.838490903377533, variance: 0.20489604771137238
Train size: [784] hidden size: [99] batch size: [784] trial: 7, train_loss: 1.029729, train acc: 17.44260204081633, test loss: 1.041

Train size: [784] hidden size: [99] batch size: [784] trial: 40, train_loss: 1.027861, train acc: 18.591338974614235, test loss: 1.036424, test acc: 17.91512195121951, bias2: 0.8032259941101074, variance: 0.23319843411445618
Train size: [784] hidden size: [99] batch size: [784] trial: 41, train_loss: 1.027285, train acc: 18.61637512147716, test loss: 1.035979, test acc: 17.932142857142857, bias2: 0.8026992082595825, variance: 0.23328004777431488
Train size: [784] hidden size: [99] batch size: [784] trial: 42, train_loss: 1.026459, train acc: 18.747033697199807, test loss: 1.035200, test acc: 18.046279069767444, bias2: 0.801832914352417, variance: 0.23336756229400635
Train size: [784] hidden size: [99] batch size: [784] trial: 43, train_loss: 1.026981, train acc: 18.77608998144712, test loss: 1.036349, test acc: 17.998863636363637, bias2: 0.8031615018844604, variance: 0.23318743705749512
Train size: [784] hidden size: [99] batch size: [784] trial: 44, train_loss: 1.028484, train acc: 18

Train size: [784] hidden size: [114] batch size: [784] trial: 27, train_loss: 1.022989, train acc: 18.54500728862974, test loss: 1.031426, test acc: 18.017857142857146, bias2: 0.8046733140945435, variance: 0.22675274312496185
Train size: [784] hidden size: [114] batch size: [784] trial: 28, train_loss: 1.025564, train acc: 18.450914848698105, test loss: 1.033716, test acc: 17.92517241379311, bias2: 0.8052495718002319, variance: 0.22846628725528717
Train size: [784] hidden size: [114] batch size: [784] trial: 29, train_loss: 1.025632, train acc: 18.443877551020414, test loss: 1.034410, test acc: 17.864666666666672, bias2: 0.804965615272522, variance: 0.22944439947605133
Train size: [784] hidden size: [114] batch size: [784] trial: 30, train_loss: 1.026900, train acc: 18.453752468729434, test loss: 1.035170, test acc: 17.84806451612904, bias2: 0.804772138595581, variance: 0.23039792478084564
Train size: [784] hidden size: [114] batch size: [784] trial: 31, train_loss: 1.025166, train acc

Train size: [784] hidden size: [131] batch size: [784] trial: 14, train_loss: 0.989761, train acc: 22.27891156462585, test loss: 1.003229, test acc: 21.209999999999997, bias2: 0.7902370095252991, variance: 0.21299248933792114
Train size: [784] hidden size: [131] batch size: [784] trial: 15, train_loss: 0.993539, train acc: 21.90688775510204, test loss: 1.006488, test acc: 20.910625, bias2: 0.7919350266456604, variance: 0.21455340087413788
Train size: [784] hidden size: [131] batch size: [784] trial: 16, train_loss: 0.991270, train acc: 22.141356542617046, test loss: 1.004906, test acc: 21.063529411764705, bias2: 0.7900425791740417, variance: 0.21486324071884155
Train size: [784] hidden size: [131] batch size: [784] trial: 17, train_loss: 0.990130, train acc: 22.2718253968254, test loss: 1.004018, test acc: 21.200555555555557, bias2: 0.7875028252601624, variance: 0.21651510894298553
Train size: [784] hidden size: [131] batch size: [784] trial: 18, train_loss: 0.992716, train acc: 21.911

Train size: [784] hidden size: [151] batch size: [784] trial: 0, train_loss: 0.994455, train acc: 18.494897959183675, test loss: 0.995188, test acc: 19.65, bias2: 0.9951879382133484, variance: 1.3623918659888545e-09
Train size: [784] hidden size: [151] batch size: [784] trial: 1, train_loss: 0.969593, train acc: 22.512755102040817, test loss: 0.972966, test acc: 22.72, bias2: 0.8562074303627014, variance: 0.11675817519426346
Train size: [784] hidden size: [151] batch size: [784] trial: 2, train_loss: 0.958998, train acc: 24.40476190476191, test loss: 0.966442, test acc: 23.786666666666665, bias2: 0.8171530365943909, variance: 0.1492888480424881
Train size: [784] hidden size: [151] batch size: [784] trial: 3, train_loss: 0.966795, train acc: 23.947704081632654, test loss: 0.978429, test acc: 22.71, bias2: 0.8129919767379761, variance: 0.1654370278120041
Train size: [784] hidden size: [151] batch size: [784] trial: 4, train_loss: 0.968810, train acc: 24.668367346938776, test loss: 0.9822

Train size: [784] hidden size: [151] batch size: [784] trial: 37, train_loss: 0.977224, train acc: 23.257921589688507, test loss: 0.987556, test acc: 22.32710526315789, bias2: 0.768121063709259, variance: 0.21943455934524536
Train size: [784] hidden size: [151] batch size: [784] trial: 38, train_loss: 0.977466, train acc: 23.16195709052852, test loss: 0.987931, test acc: 22.223076923076917, bias2: 0.768701434135437, variance: 0.21922922134399414
Train size: [784] hidden size: [151] batch size: [784] trial: 39, train_loss: 0.978493, train acc: 23.035714285714285, test loss: 0.989067, test acc: 22.032749999999997, bias2: 0.7698429226875305, variance: 0.21922361850738525
Train size: [784] hidden size: [151] batch size: [784] trial: 40, train_loss: 0.979614, train acc: 22.900074664011946, test loss: 0.990759, test acc: 21.8680487804878, bias2: 0.7711833119392395, variance: 0.21957558393478394
Train size: [784] hidden size: [151] batch size: [784] trial: 41, train_loss: 0.979321, train acc:

Train size: [784] hidden size: [174] batch size: [784] trial: 24, train_loss: 0.952948, train acc: 26.178571428571423, test loss: 0.963585, test acc: 25.23399999999999, bias2: 0.7528235912322998, variance: 0.21076098084449768
Train size: [784] hidden size: [174] batch size: [784] trial: 25, train_loss: 0.950628, train acc: 26.48645996860282, test loss: 0.961525, test acc: 25.538846153846144, bias2: 0.750487208366394, variance: 0.21103784441947937
Train size: [784] hidden size: [174] batch size: [784] trial: 26, train_loss: 0.949458, train acc: 26.695956160241867, test loss: 0.960670, test acc: 25.777777777777768, bias2: 0.7483484745025635, variance: 0.21232198178768158
Train size: [784] hidden size: [174] batch size: [784] trial: 27, train_loss: 0.948943, train acc: 26.740160349854218, test loss: 0.960398, test acc: 25.79499999999999, bias2: 0.7482649683952332, variance: 0.21213334798812866
Train size: [784] hidden size: [174] batch size: [784] trial: 28, train_loss: 0.948092, train ac

Train size: [784] hidden size: [201] batch size: [784] trial: 11, train_loss: 0.915427, train acc: 33.01445578231293, test loss: 0.924515, test acc: 31.364166666666666, bias2: 0.7273628115653992, variance: 0.1971522718667984
Train size: [784] hidden size: [201] batch size: [784] trial: 12, train_loss: 0.916622, train acc: 32.55494505494506, test loss: 0.926337, test acc: 30.734615384615385, bias2: 0.7284048795700073, variance: 0.19793236255645752
Train size: [784] hidden size: [201] batch size: [784] trial: 13, train_loss: 0.919387, train acc: 32.02441690962099, test loss: 0.929222, test acc: 30.255, bias2: 0.7278817892074585, variance: 0.20134006440639496
Train size: [784] hidden size: [201] batch size: [784] trial: 14, train_loss: 0.920547, train acc: 31.75170068027211, test loss: 0.930923, test acc: 29.926, bias2: 0.7285487055778503, variance: 0.2023741453886032
Train size: [784] hidden size: [201] batch size: [784] trial: 15, train_loss: 0.923838, train acc: 31.27391581632653, test

Train size: [784] hidden size: [201] batch size: [784] trial: 48, train_loss: 0.923968, train acc: 30.06559766763848, test loss: 0.936877, test acc: 28.477755102040813, bias2: 0.7263702750205994, variance: 0.21050631999969482
Train size: [784] hidden size: [201] batch size: [784] trial: 49, train_loss: 0.923632, train acc: 30.12755102040816, test loss: 0.936322, test acc: 28.553199999999997, bias2: 0.7261331081390381, variance: 0.21018937230110168
##################################################
Train size: [784] hidden size: [231] batch size: [784] trial: 0, train_loss: 0.927915, train acc: 27.678571428571427, test loss: 0.936893, test acc: 26.33, bias2: 0.9368932843208313, variance: -1.9462739753173253e-10
Train size: [784] hidden size: [231] batch size: [784] trial: 1, train_loss: 0.897136, train acc: 31.18622448979592, test loss: 0.910996, test acc: 29.945, bias2: 0.7976492643356323, variance: 0.1133468821644783
Train size: [784] hidden size: [231] batch size: [784] trial: 2, tra

Train size: [784] hidden size: [231] batch size: [784] trial: 35, train_loss: 0.915240, train acc: 29.985119047619044, test loss: 0.929500, test acc: 28.880555555555556, bias2: 0.7236467003822327, variance: 0.20585320889949799
Train size: [784] hidden size: [231] batch size: [784] trial: 36, train_loss: 0.914935, train acc: 30.005515719801426, test loss: 0.929158, test acc: 28.931081081081082, bias2: 0.7230069637298584, variance: 0.206150621175766
Train size: [784] hidden size: [231] batch size: [784] trial: 37, train_loss: 0.914179, train acc: 29.98791621911922, test loss: 0.928956, test acc: 28.881315789473685, bias2: 0.722956657409668, variance: 0.20599952340126038
Train size: [784] hidden size: [231] batch size: [784] trial: 38, train_loss: 0.914951, train acc: 29.964678178963887, test loss: 0.929554, test acc: 28.891794871794872, bias2: 0.7233285903930664, variance: 0.20622509717941284
Train size: [784] hidden size: [231] batch size: [784] trial: 39, train_loss: 0.914089, train ac

Train size: [784] hidden size: [266] batch size: [784] trial: 22, train_loss: 0.875444, train acc: 35.34272404614019, test loss: 0.893506, test acc: 33.7791304347826, bias2: 0.6953492164611816, variance: 0.1981564462184906
Train size: [784] hidden size: [266] batch size: [784] trial: 23, train_loss: 0.874905, train acc: 35.554846938775505, test loss: 0.892105, test acc: 33.94666666666666, bias2: 0.6935221552848816, variance: 0.1985829621553421
Train size: [784] hidden size: [266] batch size: [784] trial: 24, train_loss: 0.875875, train acc: 35.38265306122448, test loss: 0.893469, test acc: 33.7268, bias2: 0.6947505474090576, variance: 0.1987181305885315
Train size: [784] hidden size: [266] batch size: [784] trial: 25, train_loss: 0.877455, train acc: 35.14030612244897, test loss: 0.894361, test acc: 33.553076923076915, bias2: 0.695972740650177, variance: 0.1983882337808609
Train size: [784] hidden size: [266] batch size: [784] trial: 26, train_loss: 0.878425, train acc: 34.944255479969

Train size: [784] hidden size: [306] batch size: [784] trial: 9, train_loss: 0.851908, train acc: 38.494897959183675, test loss: 0.867032, test acc: 36.871, bias2: 0.691493809223175, variance: 0.17553818225860596
Train size: [784] hidden size: [306] batch size: [784] trial: 10, train_loss: 0.852335, train acc: 38.6943413729128, test loss: 0.868374, test acc: 36.93, bias2: 0.6906546354293823, variance: 0.17771974205970764
Train size: [784] hidden size: [306] batch size: [784] trial: 11, train_loss: 0.852851, train acc: 38.67984693877551, test loss: 0.869924, test acc: 36.88666666666666, bias2: 0.6899807453155518, variance: 0.1799437254667282
Train size: [784] hidden size: [306] batch size: [784] trial: 12, train_loss: 0.850035, train acc: 39.08948194662481, test loss: 0.868608, test acc: 37.097692307692306, bias2: 0.6872049570083618, variance: 0.181402787566185
Train size: [784] hidden size: [306] batch size: [784] trial: 13, train_loss: 0.848591, train acc: 39.349489795918366, test los

Train size: [784] hidden size: [306] batch size: [784] trial: 46, train_loss: 0.850478, train acc: 38.95733825445072, test loss: 0.867940, test acc: 37.094468085106385, bias2: 0.6744024753570557, variance: 0.19353707134723663
Train size: [784] hidden size: [306] batch size: [784] trial: 47, train_loss: 0.850642, train acc: 38.93760629251701, test loss: 0.868281, test acc: 37.06395833333334, bias2: 0.6741244792938232, variance: 0.19415603578090668
Train size: [784] hidden size: [306] batch size: [784] trial: 48, train_loss: 0.849884, train acc: 39.048833819241985, test loss: 0.868012, test acc: 37.16040816326531, bias2: 0.6735185384750366, variance: 0.194493368268013
Train size: [784] hidden size: [306] batch size: [784] trial: 49, train_loss: 0.850053, train acc: 39.04336734693877, test loss: 0.867914, test acc: 37.17080000000001, bias2: 0.6735256314277649, variance: 0.19438816606998444
##################################################
Train size: [784] hidden size: [353] batch size: 

Train size: [784] hidden size: [353] batch size: [784] trial: 32, train_loss: 0.827926, train acc: 41.968150896722335, test loss: 0.846327, test acc: 40.279090909090904, bias2: 0.658052921295166, variance: 0.1882745921611786
Train size: [784] hidden size: [353] batch size: [784] trial: 33, train_loss: 0.827626, train acc: 42.01680672268908, test loss: 0.846190, test acc: 40.26735294117647, bias2: 0.6580500602722168, variance: 0.188139870762825
Train size: [784] hidden size: [353] batch size: [784] trial: 34, train_loss: 0.827008, train acc: 42.20481049562683, test loss: 0.845522, test acc: 40.39228571428571, bias2: 0.6570879220962524, variance: 0.18843385577201843
Train size: [784] hidden size: [353] batch size: [784] trial: 35, train_loss: 0.826878, train acc: 42.28316326530613, test loss: 0.845510, test acc: 40.44777777777778, bias2: 0.6567128896713257, variance: 0.18879711627960205
Train size: [784] hidden size: [353] batch size: [784] trial: 36, train_loss: 0.827857, train acc: 42.

Train size: [784] hidden size: [406] batch size: [784] trial: 19, train_loss: 0.798148, train acc: 46.964285714285715, test loss: 0.818676, test acc: 43.894000000000005, bias2: 0.6429012417793274, variance: 0.1757746934890747
Train size: [784] hidden size: [406] batch size: [784] trial: 20, train_loss: 0.796484, train acc: 47.054178814382894, test loss: 0.817108, test acc: 44.11380952380953, bias2: 0.640657901763916, variance: 0.1764502376317978
Train size: [784] hidden size: [406] batch size: [784] trial: 21, train_loss: 0.797528, train acc: 46.99095547309833, test loss: 0.817487, test acc: 44.052727272727275, bias2: 0.6406232118606567, variance: 0.17686417698860168
Train size: [784] hidden size: [406] batch size: [784] trial: 22, train_loss: 0.798303, train acc: 46.98314108251997, test loss: 0.818677, test acc: 44.00130434782609, bias2: 0.6408751606941223, variance: 0.17780141532421112
Train size: [784] hidden size: [406] batch size: [784] trial: 23, train_loss: 0.798763, train acc: 

Train size: [784] hidden size: [468] batch size: [784] trial: 6, train_loss: 0.763962, train acc: 50.255102040816325, test loss: 0.789620, test acc: 47.80428571428571, bias2: 0.6334856748580933, variance: 0.1561344563961029
Train size: [784] hidden size: [468] batch size: [784] trial: 7, train_loss: 0.763245, train acc: 50.255102040816325, test loss: 0.787696, test acc: 47.885, bias2: 0.6290033459663391, variance: 0.15869291126728058
Train size: [784] hidden size: [468] batch size: [784] trial: 8, train_loss: 0.764330, train acc: 50.42517006802721, test loss: 0.788573, test acc: 47.99888888888889, bias2: 0.6276154518127441, variance: 0.16095790266990662
Train size: [784] hidden size: [468] batch size: [784] trial: 9, train_loss: 0.766708, train acc: 50.33163265306122, test loss: 0.790277, test acc: 47.794, bias2: 0.6260114312171936, variance: 0.16426582634449005
Train size: [784] hidden size: [468] batch size: [784] trial: 10, train_loss: 0.768674, train acc: 50.1043599257885, test los

Train size: [784] hidden size: [468] batch size: [784] trial: 43, train_loss: 0.768424, train acc: 50.62326066790353, test loss: 0.793349, test acc: 47.3834090909091, bias2: 0.6157110929489136, variance: 0.17763814330101013
Train size: [784] hidden size: [468] batch size: [784] trial: 44, train_loss: 0.768864, train acc: 50.54988662131519, test loss: 0.793585, test acc: 47.33044444444445, bias2: 0.6158524751663208, variance: 0.17773281037807465
Train size: [784] hidden size: [468] batch size: [784] trial: 45, train_loss: 0.768834, train acc: 50.535159716060335, test loss: 0.793275, test acc: 47.33521739130436, bias2: 0.615807831287384, variance: 0.17746742069721222
Train size: [784] hidden size: [468] batch size: [784] trial: 46, train_loss: 0.768671, train acc: 50.62147199305254, test loss: 0.793568, test acc: 47.36638297872342, bias2: 0.6158456802368164, variance: 0.17772237956523895
Train size: [784] hidden size: [468] batch size: [784] trial: 47, train_loss: 0.768894, train acc: 50

Train size: [784] hidden size: [538] batch size: [784] trial: 30, train_loss: 0.740844, train acc: 54.16392363396973, test loss: 0.766209, test acc: 50.80870967741935, bias2: 0.5986077189445496, variance: 0.16760140657424927
Train size: [784] hidden size: [538] batch size: [784] trial: 31, train_loss: 0.740337, train acc: 54.21715561224491, test loss: 0.765627, test acc: 50.925625, bias2: 0.5978711843490601, variance: 0.1677558571100235
Train size: [784] hidden size: [538] batch size: [784] trial: 32, train_loss: 0.740283, train acc: 54.28648732220162, test loss: 0.765244, test acc: 50.97696969696969, bias2: 0.5975622534751892, variance: 0.16768169403076172
Train size: [784] hidden size: [538] batch size: [784] trial: 33, train_loss: 0.739869, train acc: 54.25795318127252, test loss: 0.765124, test acc: 50.95470588235293, bias2: 0.597181499004364, variance: 0.16794247925281525
Train size: [784] hidden size: [538] batch size: [784] trial: 34, train_loss: 0.739220, train acc: 54.35495626

Train size: [784] hidden size: [620] batch size: [784] trial: 17, train_loss: 0.708334, train acc: 57.97193877551021, test loss: 0.733623, test acc: 55.19388888888889, bias2: 0.5771973133087158, variance: 0.15642614662647247
Train size: [784] hidden size: [620] batch size: [784] trial: 18, train_loss: 0.708298, train acc: 58.00886143931258, test loss: 0.734014, test acc: 55.18473684210527, bias2: 0.5771933197975159, variance: 0.1568203568458557
Train size: [784] hidden size: [620] batch size: [784] trial: 19, train_loss: 0.708677, train acc: 57.8826530612245, test loss: 0.733872, test acc: 55.236500000000014, bias2: 0.576816201210022, variance: 0.15705610811710358
Train size: [784] hidden size: [620] batch size: [784] trial: 20, train_loss: 0.707608, train acc: 58.060009718173, test loss: 0.733064, test acc: 55.32952380952382, bias2: 0.5759900808334351, variance: 0.15707442164421082
Train size: [784] hidden size: [620] batch size: [784] trial: 21, train_loss: 0.707658, train acc: 58.11

Train size: [784] hidden size: [714] batch size: [784] trial: 4, train_loss: 0.676578, train acc: 61.45408163265306, test loss: 0.706152, test acc: 58.676, bias2: 0.5776907205581665, variance: 0.1284608542919159
Train size: [784] hidden size: [714] batch size: [784] trial: 5, train_loss: 0.675365, train acc: 61.3095238095238, test loss: 0.707079, test acc: 58.22666666666667, bias2: 0.574570894241333, variance: 0.1325078010559082
Train size: [784] hidden size: [714] batch size: [784] trial: 6, train_loss: 0.678878, train acc: 61.20626822157434, test loss: 0.708104, test acc: 58.40714285714286, bias2: 0.5706616640090942, variance: 0.137442484498024
Train size: [784] hidden size: [714] batch size: [784] trial: 7, train_loss: 0.676997, train acc: 61.62308673469387, test loss: 0.707324, test acc: 58.322500000000005, bias2: 0.5669209361076355, variance: 0.14040310680866241
Train size: [784] hidden size: [714] batch size: [784] trial: 8, train_loss: 0.678872, train acc: 61.39455782312925, tes

Train size: [784] hidden size: [714] batch size: [784] trial: 41, train_loss: 0.679415, train acc: 61.65269679300292, test loss: 0.709367, test acc: 58.271904761904764, bias2: 0.551508903503418, variance: 0.15785758197307587
Train size: [784] hidden size: [714] batch size: [784] trial: 42, train_loss: 0.680088, train acc: 61.604176554342665, test loss: 0.709671, test acc: 58.24511627906977, bias2: 0.5517675876617432, variance: 0.15790386497974396
Train size: [784] hidden size: [714] batch size: [784] trial: 43, train_loss: 0.680008, train acc: 61.572356215213354, test loss: 0.709626, test acc: 58.21409090909091, bias2: 0.5516095161437988, variance: 0.15801651775836945
Train size: [784] hidden size: [714] batch size: [784] trial: 44, train_loss: 0.680030, train acc: 61.58163265306121, test loss: 0.710028, test acc: 58.18666666666667, bias2: 0.5515776872634888, variance: 0.15845046937465668
Train size: [784] hidden size: [714] batch size: [784] trial: 45, train_loss: 0.680466, train acc:

Train size: [784] hidden size: [822] batch size: [784] trial: 28, train_loss: 0.659638, train acc: 63.991027445460944, test loss: 0.691709, test acc: 60.454827586206896, bias2: 0.540165901184082, variance: 0.15154320001602173
Train size: [784] hidden size: [822] batch size: [784] trial: 29, train_loss: 0.658561, train acc: 64.16666666666667, test loss: 0.691132, test acc: 60.519000000000005, bias2: 0.5393401384353638, variance: 0.15179221332073212
Train size: [784] hidden size: [822] batch size: [784] trial: 30, train_loss: 0.657752, train acc: 64.2198815009875, test loss: 0.690071, test acc: 60.60709677419355, bias2: 0.5384166240692139, variance: 0.15165458619594574
Train size: [784] hidden size: [822] batch size: [784] trial: 31, train_loss: 0.657467, train acc: 64.25382653061224, test loss: 0.690217, test acc: 60.572812500000005, bias2: 0.5380923748016357, variance: 0.1521243304014206
Train size: [784] hidden size: [822] batch size: [784] trial: 32, train_loss: 0.657635, train acc: 

Train size: [784] hidden size: [946] batch size: [784] trial: 15, train_loss: 0.628859, train acc: 66.91645408163265, test loss: 0.664949, test acc: 62.92999999999999, bias2: 0.5242922902107239, variance: 0.14065666496753693
Train size: [784] hidden size: [946] batch size: [784] trial: 16, train_loss: 0.627781, train acc: 67.0018007202881, test loss: 0.664167, test acc: 62.96529411764705, bias2: 0.523289680480957, variance: 0.14087745547294617
Train size: [784] hidden size: [946] batch size: [784] trial: 17, train_loss: 0.628986, train acc: 67.02806122448979, test loss: 0.664905, test acc: 62.97722222222222, bias2: 0.5237500667572021, variance: 0.141154482960701
Train size: [784] hidden size: [946] batch size: [784] trial: 18, train_loss: 0.629156, train acc: 67.0985499462943, test loss: 0.665472, test acc: 62.95315789473684, bias2: 0.5236033201217651, variance: 0.14186862111091614
Train size: [784] hidden size: [946] batch size: [784] trial: 19, train_loss: 0.629972, train acc: 67.110

Train size: [784] hidden size: [1089] batch size: [784] trial: 1, train_loss: 0.590611, train acc: 72.38520408163265, test loss: 0.636088, test acc: 66.995, bias2: 0.5687192678451538, variance: 0.06736912578344345
Train size: [784] hidden size: [1089] batch size: [784] trial: 2, train_loss: 0.590367, train acc: 71.42857142857143, test loss: 0.635818, test acc: 66.18666666666667, bias2: 0.5419928431510925, variance: 0.09382474422454834
Train size: [784] hidden size: [1089] batch size: [784] trial: 3, train_loss: 0.598792, train acc: 70.56760204081633, test loss: 0.636405, test acc: 66.4725, bias2: 0.5310817956924438, variance: 0.10532326251268387
Train size: [784] hidden size: [1089] batch size: [784] trial: 4, train_loss: 0.597306, train acc: 70.28061224489795, test loss: 0.635086, test acc: 66.526, bias2: 0.521275520324707, variance: 0.11381041258573532
Train size: [784] hidden size: [1089] batch size: [784] trial: 5, train_loss: 0.601262, train acc: 70.15306122448979, test loss: 0.63

Train size: [784] hidden size: [1089] batch size: [784] trial: 38, train_loss: 0.606441, train acc: 69.95355834641549, test loss: 0.643220, test acc: 65.8497435897436, bias2: 0.501069962978363, variance: 0.14215010404586792
Train size: [784] hidden size: [1089] batch size: [784] trial: 39, train_loss: 0.606052, train acc: 69.97448979591837, test loss: 0.643427, test acc: 65.80525000000002, bias2: 0.5010016560554504, variance: 0.14242547750473022
Train size: [784] hidden size: [1089] batch size: [784] trial: 40, train_loss: 0.606755, train acc: 69.90107018417123, test loss: 0.643942, test acc: 65.74731707317073, bias2: 0.5012346506118774, variance: 0.14270716905593872
Train size: [784] hidden size: [1089] batch size: [784] trial: 41, train_loss: 0.606795, train acc: 69.87366375121478, test loss: 0.644437, test acc: 65.68690476190477, bias2: 0.5016169548034668, variance: 0.14282003045082092
Train size: [784] hidden size: [1089] batch size: [784] trial: 42, train_loss: 0.606710, train acc

Train size: [784] hidden size: [1254] batch size: [784] trial: 25, train_loss: 0.579564, train acc: 72.8021978021978, test loss: 0.622987, test acc: 67.9403846153846, bias2: 0.48780202865600586, variance: 0.13518457114696503
Train size: [784] hidden size: [1254] batch size: [784] trial: 26, train_loss: 0.579380, train acc: 72.83163265306122, test loss: 0.622913, test acc: 67.96148148148147, bias2: 0.48734843730926514, variance: 0.13556499779224396
Train size: [784] hidden size: [1254] batch size: [784] trial: 27, train_loss: 0.579128, train acc: 72.85896501457725, test loss: 0.622687, test acc: 68.01321428571428, bias2: 0.48709607124328613, variance: 0.13559050858020782
Train size: [784] hidden size: [1254] batch size: [784] trial: 28, train_loss: 0.578939, train acc: 72.853624208304, test loss: 0.622455, test acc: 68.03344827586206, bias2: 0.4867627024650574, variance: 0.1356927901506424
Train size: [784] hidden size: [1254] batch size: [784] trial: 29, train_loss: 0.579688, train acc

Train size: [784] hidden size: [1444] batch size: [784] trial: 12, train_loss: 0.557868, train acc: 75.35321821036106, test loss: 0.602777, test acc: 70.33923076923077, bias2: 0.47768938541412354, variance: 0.12508775293827057
Train size: [784] hidden size: [1444] batch size: [784] trial: 13, train_loss: 0.558728, train acc: 75.30065597667638, test loss: 0.603317, test acc: 70.28714285714285, bias2: 0.47758758068084717, variance: 0.12572939693927765
Train size: [784] hidden size: [1444] batch size: [784] trial: 14, train_loss: 0.557441, train acc: 75.39115646258503, test loss: 0.603320, test acc: 70.26333333333334, bias2: 0.4770597219467163, variance: 0.12626047432422638
Train size: [784] hidden size: [1444] batch size: [784] trial: 15, train_loss: 0.557426, train acc: 75.41454081632652, test loss: 0.603241, test acc: 70.281875, bias2: 0.4764121174812317, variance: 0.12682920694351196
Train size: [784] hidden size: [1444] batch size: [784] trial: 16, train_loss: 0.557480, train acc: 75

Train size: [784] hidden size: [1444] batch size: [784] trial: 49, train_loss: 0.558032, train acc: 74.97704081632654, test loss: 0.603036, test acc: 70.1538, bias2: 0.4711015224456787, variance: 0.13193438947200775
##################################################
Train size: [784] hidden size: [1663] batch size: [784] trial: 0, train_loss: 0.539467, train acc: 75.25510204081633, test loss: 0.590620, test acc: 70.38, bias2: 0.5906200408935547, variance: 7.785096123313906e-09
Train size: [784] hidden size: [1663] batch size: [784] trial: 1, train_loss: 0.538284, train acc: 75.12755102040816, test loss: 0.585652, test acc: 71.245, bias2: 0.5206993818283081, variance: 0.06495301425457001
Train size: [784] hidden size: [1663] batch size: [784] trial: 2, train_loss: 0.530543, train acc: 76.27551020408163, test loss: 0.583090, test acc: 71.30333333333334, bias2: 0.49599432945251465, variance: 0.08709544688463211
Train size: [784] hidden size: [1663] batch size: [784] trial: 3, train_loss: 

Train size: [784] hidden size: [1663] batch size: [784] trial: 35, train_loss: 0.534861, train acc: 76.80697278911565, test loss: 0.584877, test acc: 71.8477777777778, bias2: 0.4587392210960388, variance: 0.12613819539546967
Train size: [784] hidden size: [1663] batch size: [784] trial: 36, train_loss: 0.534813, train acc: 76.84431880860453, test loss: 0.584789, test acc: 71.85189189189191, bias2: 0.45865172147750854, variance: 0.1261371374130249
Train size: [784] hidden size: [1663] batch size: [784] trial: 37, train_loss: 0.535071, train acc: 76.8360633727175, test loss: 0.584883, test acc: 71.84026315789474, bias2: 0.4586133360862732, variance: 0.12626944482326508
Train size: [784] hidden size: [1663] batch size: [784] trial: 38, train_loss: 0.534751, train acc: 76.87074829931971, test loss: 0.584859, test acc: 71.82128205128205, bias2: 0.45837682485580444, variance: 0.12648248672485352
Train size: [784] hidden size: [1663] batch size: [784] trial: 39, train_loss: 0.534867, train ac

Train size: [784] hidden size: [1915] batch size: [784] trial: 22, train_loss: 0.511249, train acc: 79.12045252883763, test loss: 0.566442, test acc: 73.76, bias2: 0.4475783705711365, variance: 0.11886338889598846
Train size: [784] hidden size: [1915] batch size: [784] trial: 23, train_loss: 0.511198, train acc: 79.07100340136056, test loss: 0.566702, test acc: 73.74291666666666, bias2: 0.4475683569908142, variance: 0.11913353949785233
Train size: [784] hidden size: [1915] batch size: [784] trial: 24, train_loss: 0.511858, train acc: 79.05102040816328, test loss: 0.566748, test acc: 73.7632, bias2: 0.4473896026611328, variance: 0.11935819685459137
Train size: [784] hidden size: [1915] batch size: [784] trial: 25, train_loss: 0.512054, train acc: 79.02276295133439, test loss: 0.566555, test acc: 73.77615384615385, bias2: 0.4470887780189514, variance: 0.11946631222963333
Train size: [784] hidden size: [1915] batch size: [784] trial: 26, train_loss: 0.511608, train acc: 79.05801209372639,

Train size: [784] hidden size: [2204] batch size: [784] trial: 9, train_loss: 0.495115, train acc: 80.9311224489796, test loss: 0.548251, test acc: 75.337, bias2: 0.4393719732761383, variance: 0.10887929797172546
Train size: [784] hidden size: [2204] batch size: [784] trial: 10, train_loss: 0.495798, train acc: 80.9369202226345, test loss: 0.548225, test acc: 75.41181818181818, bias2: 0.4382183253765106, variance: 0.11000631004571915
Train size: [784] hidden size: [2204] batch size: [784] trial: 11, train_loss: 0.494055, train acc: 81.15433673469387, test loss: 0.548069, test acc: 75.42666666666666, bias2: 0.4368681311607361, variance: 0.11120136082172394
Train size: [784] hidden size: [2204] batch size: [784] trial: 12, train_loss: 0.494548, train acc: 81.04395604395603, test loss: 0.548999, test acc: 75.38076923076923, bias2: 0.43697893619537354, variance: 0.11202042549848557
Train size: [784] hidden size: [2204] batch size: [784] trial: 13, train_loss: 0.495815, train acc: 81.149781

Train size: [784] hidden size: [2204] batch size: [784] trial: 46, train_loss: 0.494598, train acc: 80.95690403821101, test loss: 0.550557, test acc: 75.34127659574469, bias2: 0.43145090341567993, variance: 0.1191062405705452
Train size: [784] hidden size: [2204] batch size: [784] trial: 47, train_loss: 0.494540, train acc: 80.9736394557823, test loss: 0.550411, test acc: 75.35354166666667, bias2: 0.43140456080436707, variance: 0.11900629848241806
Train size: [784] hidden size: [2204] batch size: [784] trial: 48, train_loss: 0.494165, train acc: 80.98448563098708, test loss: 0.550214, test acc: 75.3661224489796, bias2: 0.43130964040756226, variance: 0.11890460550785065
Train size: [784] hidden size: [2204] batch size: [784] trial: 49, train_loss: 0.493908, train acc: 81.05357142857142, test loss: 0.550054, test acc: 75.40180000000001, bias2: 0.43110257387161255, variance: 0.1189514547586441
##################################################
Train size: [784] hidden size: [2538] batch s

Train size: [784] hidden size: [2538] batch size: [784] trial: 33, train_loss: 0.473099, train acc: 82.34168667466986, test loss: 0.536522, test acc: 76.21235294117646, bias2: 0.42212074995040894, variance: 0.11440151184797287
Train size: [784] hidden size: [2538] batch size: [784] trial: 34, train_loss: 0.473070, train acc: 82.33965014577258, test loss: 0.536610, test acc: 76.202, bias2: 0.4220893681049347, variance: 0.11452100425958633
Train size: [784] hidden size: [2538] batch size: [784] trial: 35, train_loss: 0.473090, train acc: 82.33772675736961, test loss: 0.536632, test acc: 76.21499999999999, bias2: 0.4220609664916992, variance: 0.11457119882106781
Train size: [784] hidden size: [2538] batch size: [784] trial: 36, train_loss: 0.473250, train acc: 82.35659128516271, test loss: 0.536806, test acc: 76.22, bias2: 0.4220864772796631, variance: 0.11471962183713913
Train size: [784] hidden size: [2538] batch size: [784] trial: 37, train_loss: 0.473563, train acc: 82.37446294307196,

Train size: [784] hidden size: [2922] batch size: [784] trial: 20, train_loss: 0.458103, train acc: 83.75850340136053, test loss: 0.520423, test acc: 77.7895238095238, bias2: 0.413061261177063, variance: 0.10736185312271118
Train size: [784] hidden size: [2922] batch size: [784] trial: 21, train_loss: 0.458161, train acc: 83.74304267161409, test loss: 0.520516, test acc: 77.79318181818181, bias2: 0.4127185344696045, variance: 0.10779750347137451
Train size: [784] hidden size: [2922] batch size: [784] trial: 22, train_loss: 0.457314, train acc: 83.77329192546583, test loss: 0.520503, test acc: 77.75999999999999, bias2: 0.4121461510658264, variance: 0.10835693776607513
Train size: [784] hidden size: [2922] batch size: [784] trial: 23, train_loss: 0.456803, train acc: 83.78507653061223, test loss: 0.520271, test acc: 77.78083333333332, bias2: 0.4118359088897705, variance: 0.10843537747859955
Train size: [784] hidden size: [2922] batch size: [784] trial: 24, train_loss: 0.456139, train acc

Train size: [784] hidden size: [3365] batch size: [784] trial: 7, train_loss: 0.438227, train acc: 85.02869897959184, test loss: 0.509390, test acc: 78.49875, bias2: 0.41177666187286377, variance: 0.09761296957731247
Train size: [784] hidden size: [3365] batch size: [784] trial: 8, train_loss: 0.437625, train acc: 84.94897959183673, test loss: 0.509689, test acc: 78.49, bias2: 0.4102779030799866, variance: 0.0994107574224472
Train size: [784] hidden size: [3365] batch size: [784] trial: 9, train_loss: 0.436375, train acc: 85.2295918367347, test loss: 0.509255, test acc: 78.627, bias2: 0.4078892469406128, variance: 0.10136569291353226
Train size: [784] hidden size: [3365] batch size: [784] trial: 10, train_loss: 0.436316, train acc: 85.23886827458256, test loss: 0.508516, test acc: 78.75272727272727, bias2: 0.40672802925109863, variance: 0.10178826749324799
Train size: [784] hidden size: [3365] batch size: [784] trial: 11, train_loss: 0.436462, train acc: 85.24659863945578, test loss: 0

Train size: [784] hidden size: [3365] batch size: [784] trial: 44, train_loss: 0.437277, train acc: 85.24943310657596, test loss: 0.508606, test acc: 78.88955555555555, bias2: 0.3998483419418335, variance: 0.10875765979290009
Train size: [784] hidden size: [3365] batch size: [784] trial: 45, train_loss: 0.437293, train acc: 85.26508429458741, test loss: 0.508649, test acc: 78.89347826086956, bias2: 0.39987438917160034, variance: 0.1087750717997551
Train size: [784] hidden size: [3365] batch size: [784] trial: 46, train_loss: 0.437106, train acc: 85.29092488059054, test loss: 0.508496, test acc: 78.89021276595743, bias2: 0.3997161388397217, variance: 0.10878001898527145
Train size: [784] hidden size: [3365] batch size: [784] trial: 47, train_loss: 0.436919, train acc: 85.3342899659864, test loss: 0.508359, test acc: 78.89270833333332, bias2: 0.3995395004749298, variance: 0.10881910473108292
Train size: [784] hidden size: [3365] batch size: [784] trial: 48, train_loss: 0.436508, train ac

Train size: [784] hidden size: [3874] batch size: [784] trial: 31, train_loss: 0.417505, train acc: 86.64700255102044, test loss: 0.493934, test acc: 80.0496875, bias2: 0.3898264765739441, variance: 0.10410770028829575
Train size: [784] hidden size: [3874] batch size: [784] trial: 32, train_loss: 0.417476, train acc: 86.70763760049476, test loss: 0.493934, test acc: 80.04484848484849, bias2: 0.3897087872028351, variance: 0.10422518849372864
Train size: [784] hidden size: [3874] batch size: [784] trial: 33, train_loss: 0.417267, train acc: 86.65966386554624, test loss: 0.493979, test acc: 80.01882352941176, bias2: 0.3897155523300171, variance: 0.10426302999258041
Train size: [784] hidden size: [3874] batch size: [784] trial: 34, train_loss: 0.417142, train acc: 86.65087463556854, test loss: 0.493664, test acc: 80.05857142857143, bias2: 0.38930463790893555, variance: 0.10435944050550461
Train size: [784] hidden size: [3874] batch size: [784] trial: 35, train_loss: 0.417213, train acc: 86

Train size: [784] hidden size: [4461] batch size: [784] trial: 18, train_loss: 0.401020, train acc: 87.8625134264232, test loss: 0.483405, test acc: 80.47052631578947, bias2: 0.38491490483283997, variance: 0.0984899178147316
Train size: [784] hidden size: [4461] batch size: [784] trial: 19, train_loss: 0.400503, train acc: 87.80612244897961, test loss: 0.483549, test acc: 80.435, bias2: 0.38454669713974, variance: 0.0990021750330925
Train size: [784] hidden size: [4461] batch size: [784] trial: 20, train_loss: 0.400439, train acc: 87.74902818270166, test loss: 0.483134, test acc: 80.4804761904762, bias2: 0.38398584723472595, variance: 0.09914818406105042
Train size: [784] hidden size: [4461] batch size: [784] trial: 21, train_loss: 0.399510, train acc: 87.83627087198516, test loss: 0.483032, test acc: 80.48363636363636, bias2: 0.38362157344818115, variance: 0.09941057115793228
Train size: [784] hidden size: [4461] batch size: [784] trial: 22, train_loss: 0.398861, train acc: 87.8715616

Train size: [784] hidden size: [5136] batch size: [784] trial: 5, train_loss: 0.381204, train acc: 89.54081632653062, test loss: 0.474175, test acc: 81.19166666666666, bias2: 0.3877447247505188, variance: 0.0864303857088089
Train size: [784] hidden size: [5136] batch size: [784] trial: 6, train_loss: 0.379357, train acc: 89.50437317784258, test loss: 0.474663, test acc: 81.09857142857142, bias2: 0.3854920566082001, variance: 0.08917054533958435
Train size: [784] hidden size: [5136] batch size: [784] trial: 7, train_loss: 0.380064, train acc: 89.23788265306123, test loss: 0.475114, test acc: 80.99749999999999, bias2: 0.384122371673584, variance: 0.09099194407463074
Train size: [784] hidden size: [5136] batch size: [784] trial: 8, train_loss: 0.380601, train acc: 89.24319727891158, test loss: 0.473486, test acc: 81.17444444444443, bias2: 0.3815823197364807, variance: 0.09190391004085541
Train size: [784] hidden size: [5136] batch size: [784] trial: 9, train_loss: 0.381204, train acc: 89.

Train size: [784] hidden size: [5136] batch size: [784] trial: 42, train_loss: 0.382345, train acc: 89.03654485049834, test loss: 0.471652, test acc: 81.45395348837211, bias2: 0.3704507052898407, variance: 0.10120154172182083
Train size: [784] hidden size: [5136] batch size: [784] trial: 43, train_loss: 0.382357, train acc: 89.01032003710576, test loss: 0.471595, test acc: 81.45568181818183, bias2: 0.3704254627227783, variance: 0.10116953402757645
Train size: [784] hidden size: [5136] batch size: [784] trial: 44, train_loss: 0.382367, train acc: 89.0249433106576, test loss: 0.471523, test acc: 81.46266666666668, bias2: 0.3704027533531189, variance: 0.10111977159976959
Train size: [784] hidden size: [5136] batch size: [784] trial: 45, train_loss: 0.382565, train acc: 88.97515527950311, test loss: 0.471703, test acc: 81.45108695652175, bias2: 0.37056946754455566, variance: 0.1011335551738739
Train size: [784] hidden size: [5136] batch size: [784] trial: 46, train_loss: 0.382427, train ac

Train size: [784] hidden size: [5914] batch size: [784] trial: 29, train_loss: 0.363774, train acc: 90.31037414965986, test loss: 0.458174, test acc: 82.36066666666667, bias2: 0.3612205386161804, variance: 0.0969533696770668
Train size: [784] hidden size: [5914] batch size: [784] trial: 30, train_loss: 0.363489, train acc: 90.3554970375247, test loss: 0.458033, test acc: 82.40322580645162, bias2: 0.3609127104282379, variance: 0.09711986780166626
Train size: [784] hidden size: [5914] batch size: [784] trial: 31, train_loss: 0.363398, train acc: 90.35395408163266, test loss: 0.457838, test acc: 82.4021875, bias2: 0.3605908453464508, variance: 0.09724709391593933
Train size: [784] hidden size: [5914] batch size: [784] trial: 32, train_loss: 0.362754, train acc: 90.4066171923315, test loss: 0.457818, test acc: 82.37424242424242, bias2: 0.3604274094104767, variance: 0.09739046543836594
Train size: [784] hidden size: [5914] batch size: [784] trial: 33, train_loss: 0.362626, train acc: 90.418

Train size: [784] hidden size: [6809] batch size: [784] trial: 16, train_loss: 0.346252, train acc: 90.89135654261705, test loss: 0.448292, test acc: 82.96352941176471, bias2: 0.35576507449150085, variance: 0.09252730756998062
Train size: [784] hidden size: [6809] batch size: [784] trial: 17, train_loss: 0.346222, train acc: 90.83049886621316, test loss: 0.448456, test acc: 82.9188888888889, bias2: 0.35547101497650146, variance: 0.09298461675643921
Train size: [784] hidden size: [6809] batch size: [784] trial: 18, train_loss: 0.346170, train acc: 90.87674543501612, test loss: 0.448332, test acc: 82.91789473684211, bias2: 0.354946494102478, variance: 0.09338564425706863
Train size: [784] hidden size: [6809] batch size: [784] trial: 19, train_loss: 0.345852, train acc: 90.95025510204081, test loss: 0.448146, test acc: 82.94450000000002, bias2: 0.3543147146701813, variance: 0.09383147209882736
Train size: [784] hidden size: [6809] batch size: [784] trial: 20, train_loss: 0.346732, train a

Train size: [784] hidden size: [7840] batch size: [784] trial: 2, train_loss: 0.325500, train acc: 92.3469387755102, test loss: 0.441573, test acc: 83.11, bias2: 0.3770270347595215, variance: 0.06454605609178543
Train size: [784] hidden size: [7840] batch size: [784] trial: 3, train_loss: 0.326101, train acc: 92.31505102040816, test loss: 0.440049, test acc: 83.48, bias2: 0.36804771423339844, variance: 0.07200085371732712
Train size: [784] hidden size: [7840] batch size: [784] trial: 4, train_loss: 0.327517, train acc: 92.44897959183673, test loss: 0.441240, test acc: 83.382, bias2: 0.3645302355289459, variance: 0.07670948654413223
Train size: [784] hidden size: [7840] batch size: [784] trial: 5, train_loss: 0.327472, train acc: 92.47448979591837, test loss: 0.440667, test acc: 83.30666666666667, bias2: 0.36088788509368896, variance: 0.07977902889251709
Train size: [784] hidden size: [7840] batch size: [784] trial: 6, train_loss: 0.328401, train acc: 92.38338192419826, test loss: 0.440

Train size: [784] hidden size: [7840] batch size: [784] trial: 39, train_loss: 0.331059, train acc: 92.18431122448979, test loss: 0.440014, test acc: 83.44124999999998, bias2: 0.3444461226463318, variance: 0.09556803852319717
Train size: [784] hidden size: [7840] batch size: [784] trial: 40, train_loss: 0.331424, train acc: 92.16027874564459, test loss: 0.439933, test acc: 83.44097560975608, bias2: 0.34428465366363525, variance: 0.09564819931983948
Train size: [784] hidden size: [7840] batch size: [784] trial: 41, train_loss: 0.331527, train acc: 92.12524295432458, test loss: 0.439903, test acc: 83.44023809523807, bias2: 0.34418410062789917, variance: 0.09571884572505951
Train size: [784] hidden size: [7840] batch size: [784] trial: 42, train_loss: 0.331386, train acc: 92.12446606549597, test loss: 0.440028, test acc: 83.4095348837209, bias2: 0.3442816138267517, variance: 0.09574627131223679
Train size: [784] hidden size: [7840] batch size: [784] trial: 43, train_loss: 0.331427, train 

Train size: [784] hidden size: [8] batch size: [10] trial: 27, train_loss: 1.182287, train acc: 16.071428571428573, test loss: 1.162504, test acc: 10.7075, bias2: 0.9707857966423035, variance: 0.19171768426895142
Train size: [784] hidden size: [8] batch size: [10] trial: 28, train_loss: 1.190872, train acc: 15.517241379310345, test loss: 1.162256, test acc: 10.798620689655174, bias2: 0.9711793065071106, variance: 0.1910765916109085
Train size: [784] hidden size: [8] batch size: [10] trial: 29, train_loss: 1.195374, train acc: 15.0, test loss: 1.161099, test acc: 10.784666666666668, bias2: 0.9711734652519226, variance: 0.18992573022842407
Train size: [784] hidden size: [8] batch size: [10] trial: 30, train_loss: 1.189755, train acc: 14.516129032258064, test loss: 1.161574, test acc: 10.877419354838711, bias2: 0.968899130821228, variance: 0.19267535209655762
Train size: [784] hidden size: [8] batch size: [10] trial: 31, train_loss: 1.186795, train acc: 14.0625, test loss: 1.160751, test 

Train size: [784] hidden size: [9] batch size: [10] trial: 15, train_loss: 1.211614, train acc: 4.6875, test loss: 1.196166, test acc: 9.61375, bias2: 0.9959270358085632, variance: 0.20023898780345917
Train size: [784] hidden size: [9] batch size: [10] trial: 16, train_loss: 1.212878, train acc: 4.411764705882353, test loss: 1.196256, test acc: 9.561176470588235, bias2: 0.9921689033508301, variance: 0.2040870040655136
Train size: [784] hidden size: [9] batch size: [10] trial: 17, train_loss: 1.201185, train acc: 4.166666666666667, test loss: 1.192328, test acc: 9.69, bias2: 0.992375373840332, variance: 0.19995270669460297
Train size: [784] hidden size: [9] batch size: [10] trial: 18, train_loss: 1.196802, train acc: 5.2631578947368425, test loss: 1.184178, test acc: 10.24578947368421, bias2: 0.987568736076355, variance: 0.1966097503900528
Train size: [784] hidden size: [9] batch size: [10] trial: 19, train_loss: 1.191523, train acc: 7.5, test loss: 1.182053, test acc: 10.22299999999999

Train size: [784] hidden size: [10] batch size: [10] trial: 3, train_loss: 1.171282, train acc: 12.5, test loss: 1.235080, test acc: 8.4, bias2: 1.0497426986694336, variance: 0.1853369027376175
Train size: [784] hidden size: [10] batch size: [10] trial: 4, train_loss: 1.161996, train acc: 10.0, test loss: 1.230217, test acc: 8.476, bias2: 1.0563528537750244, variance: 0.1738642305135727
Train size: [784] hidden size: [10] batch size: [10] trial: 5, train_loss: 1.156751, train acc: 8.333333333333334, test loss: 1.214967, test acc: 9.121666666666668, bias2: 1.0316364765167236, variance: 0.18333081901073456
Train size: [784] hidden size: [10] batch size: [10] trial: 6, train_loss: 1.179337, train acc: 7.142857142857143, test loss: 1.215900, test acc: 9.492857142857144, bias2: 1.0213806629180908, variance: 0.1945192962884903
Train size: [784] hidden size: [10] batch size: [10] trial: 7, train_loss: 1.181009, train acc: 9.375, test loss: 1.233848, test acc: 8.998750000000001, bias2: 1.03378

Train size: [784] hidden size: [10] batch size: [10] trial: 41, train_loss: 1.204347, train acc: 7.738095238095238, test loss: 1.181558, test acc: 10.06, bias2: 0.9651286602020264, variance: 0.21642884612083435
Train size: [784] hidden size: [10] batch size: [10] trial: 42, train_loss: 1.208875, train acc: 8.13953488372093, test loss: 1.184335, test acc: 10.096046511627907, bias2: 0.9661870002746582, variance: 0.21814784407615662
Train size: [784] hidden size: [10] batch size: [10] trial: 43, train_loss: 1.202731, train acc: 8.522727272727273, test loss: 1.181576, test acc: 10.086363636363638, bias2: 0.965122640132904, variance: 0.21645300090312958
Train size: [784] hidden size: [10] batch size: [10] trial: 44, train_loss: 1.197849, train acc: 8.333333333333334, test loss: 1.179498, test acc: 10.174666666666669, bias2: 0.964336097240448, variance: 0.21516209840774536
Train size: [784] hidden size: [10] batch size: [10] trial: 45, train_loss: 1.204612, train acc: 8.152173913043478, test

Train size: [784] hidden size: [12] batch size: [10] trial: 29, train_loss: 1.163909, train acc: 11.666666666666666, test loss: 1.152872, test acc: 10.703666666666667, bias2: 0.9604225754737854, variance: 0.19244928658008575
Train size: [784] hidden size: [12] batch size: [10] trial: 30, train_loss: 1.165011, train acc: 11.290322580645162, test loss: 1.155390, test acc: 10.713548387096774, bias2: 0.9608322978019714, variance: 0.19455750286579132
Train size: [784] hidden size: [12] batch size: [10] trial: 31, train_loss: 1.161906, train acc: 11.71875, test loss: 1.154707, test acc: 10.88875, bias2: 0.958954393863678, variance: 0.19575245678424835
Train size: [784] hidden size: [12] batch size: [10] trial: 32, train_loss: 1.168479, train acc: 11.363636363636363, test loss: 1.156384, test acc: 10.847272727272726, bias2: 0.9572193622589111, variance: 0.1991647630929947
Train size: [784] hidden size: [12] batch size: [10] trial: 33, train_loss: 1.165381, train acc: 11.029411764705882, test 

Train size: [784] hidden size: [14] batch size: [10] trial: 17, train_loss: 1.100401, train acc: 13.88888888888889, test loss: 1.167486, test acc: 10.149444444444445, bias2: 0.9698265790939331, variance: 0.19765926897525787
Train size: [784] hidden size: [14] batch size: [10] trial: 18, train_loss: 1.098103, train acc: 14.473684210526315, test loss: 1.162522, test acc: 10.177894736842106, bias2: 0.9657589197158813, variance: 0.19676312804222107
Train size: [784] hidden size: [14] batch size: [10] trial: 19, train_loss: 1.086511, train acc: 16.25, test loss: 1.161071, test acc: 10.1365, bias2: 0.965837836265564, variance: 0.19523367285728455
Train size: [784] hidden size: [14] batch size: [10] trial: 20, train_loss: 1.110310, train acc: 15.476190476190476, test loss: 1.162762, test acc: 10.17952380952381, bias2: 0.9661445617675781, variance: 0.19661763310432434
Train size: [784] hidden size: [14] batch size: [10] trial: 21, train_loss: 1.113143, train acc: 14.772727272727273, test loss:

Train size: [784] hidden size: [16] batch size: [10] trial: 5, train_loss: 1.100626, train acc: 12.5, test loss: 1.168297, test acc: 9.796666666666667, bias2: 1.000321865081787, variance: 0.1679748147726059
Train size: [784] hidden size: [16] batch size: [10] trial: 6, train_loss: 1.112544, train acc: 10.714285714285714, test loss: 1.179688, test acc: 10.017142857142858, bias2: 0.9967363476753235, variance: 0.18295125663280487
Train size: [784] hidden size: [16] batch size: [10] trial: 7, train_loss: 1.108572, train acc: 9.375, test loss: 1.165710, test acc: 10.658750000000001, bias2: 0.9892998933792114, variance: 0.17641019821166992
Train size: [784] hidden size: [16] batch size: [10] trial: 8, train_loss: 1.121260, train acc: 8.333333333333334, test loss: 1.159221, test acc: 10.442222222222224, bias2: 0.9803344011306763, variance: 0.17888697981834412
Train size: [784] hidden size: [16] batch size: [10] trial: 9, train_loss: 1.112962, train acc: 7.5, test loss: 1.161967, test acc: 10.

Train size: [784] hidden size: [16] batch size: [10] trial: 42, train_loss: 1.154352, train acc: 9.883720930232558, test loss: 1.157082, test acc: 10.535348837209305, bias2: 0.9604631662368774, variance: 0.1966191828250885
Train size: [784] hidden size: [16] batch size: [10] trial: 43, train_loss: 1.150906, train acc: 10.795454545454545, test loss: 1.158391, test acc: 10.493636363636366, bias2: 0.961100697517395, variance: 0.19729019701480865
Train size: [784] hidden size: [16] batch size: [10] trial: 44, train_loss: 1.153932, train acc: 10.555555555555555, test loss: 1.157606, test acc: 10.580444444444446, bias2: 0.9601747989654541, variance: 0.19743087887763977
Train size: [784] hidden size: [16] batch size: [10] trial: 45, train_loss: 1.156537, train acc: 10.869565217391305, test loss: 1.160281, test acc: 10.503260869565219, bias2: 0.9614197015762329, variance: 0.19886159896850586
Train size: [784] hidden size: [16] batch size: [10] trial: 46, train_loss: 1.156292, train acc: 10.638

Train size: [784] hidden size: [18] batch size: [10] trial: 30, train_loss: 1.125297, train acc: 12.096774193548388, test loss: 1.124188, test acc: 10.60483870967742, bias2: 0.9445348978042603, variance: 0.17965328693389893
Train size: [784] hidden size: [18] batch size: [10] trial: 31, train_loss: 1.119837, train acc: 12.5, test loss: 1.121560, test acc: 10.6571875, bias2: 0.9430209398269653, variance: 0.17853876948356628
Train size: [784] hidden size: [18] batch size: [10] trial: 32, train_loss: 1.125202, train acc: 12.121212121212121, test loss: 1.122414, test acc: 10.69060606060606, bias2: 0.9416466951370239, variance: 0.18076692521572113
Train size: [784] hidden size: [18] batch size: [10] trial: 33, train_loss: 1.125200, train acc: 11.764705882352942, test loss: 1.121210, test acc: 10.881470588235294, bias2: 0.9403578639030457, variance: 0.1808517724275589
Train size: [784] hidden size: [18] batch size: [10] trial: 34, train_loss: 1.134106, train acc: 11.428571428571429, test los

Train size: [784] hidden size: [21] batch size: [10] trial: 18, train_loss: 1.088916, train acc: 14.473684210526315, test loss: 1.124637, test acc: 10.75578947368421, bias2: 0.9436734318733215, variance: 0.18096347153186798
Train size: [784] hidden size: [21] batch size: [10] trial: 19, train_loss: 1.092872, train acc: 13.75, test loss: 1.124270, test acc: 10.9725, bias2: 0.941272497177124, variance: 0.18299731612205505
Train size: [784] hidden size: [21] batch size: [10] trial: 20, train_loss: 1.095166, train acc: 14.285714285714286, test loss: 1.122811, test acc: 11.222380952380952, bias2: 0.9393593072891235, variance: 0.18345215916633606
Train size: [784] hidden size: [21] batch size: [10] trial: 21, train_loss: 1.096680, train acc: 13.636363636363637, test loss: 1.120102, test acc: 11.298636363636364, bias2: 0.9390199184417725, variance: 0.18108250200748444
Train size: [784] hidden size: [21] batch size: [10] trial: 22, train_loss: 1.095222, train acc: 13.043478260869565, test loss

Train size: [784] hidden size: [24] batch size: [10] trial: 5, train_loss: 1.030531, train acc: 12.5, test loss: 1.111324, test acc: 9.100000000000001, bias2: 0.9764237403869629, variance: 0.13490043580532074
Train size: [784] hidden size: [24] batch size: [10] trial: 6, train_loss: 1.056552, train acc: 14.285714285714286, test loss: 1.121304, test acc: 9.757142857142858, bias2: 0.9825372695922852, variance: 0.1387663185596466
Train size: [784] hidden size: [24] batch size: [10] trial: 7, train_loss: 1.061935, train acc: 15.625, test loss: 1.118633, test acc: 10.342500000000001, bias2: 0.976349949836731, variance: 0.14228259027004242
Train size: [784] hidden size: [24] batch size: [10] trial: 8, train_loss: 1.049015, train acc: 16.666666666666668, test loss: 1.112873, test acc: 10.215555555555557, bias2: 0.9723424911499023, variance: 0.14053070545196533
Train size: [784] hidden size: [24] batch size: [10] trial: 9, train_loss: 1.048916, train acc: 15.0, test loss: 1.111213, test acc: 9

Train size: [784] hidden size: [24] batch size: [10] trial: 42, train_loss: 1.126591, train acc: 8.13953488372093, test loss: 1.107549, test acc: 10.404883720930233, bias2: 0.9379907846450806, variance: 0.16955864429473877
Train size: [784] hidden size: [24] batch size: [10] trial: 43, train_loss: 1.123854, train acc: 7.954545454545454, test loss: 1.107516, test acc: 10.408863636363636, bias2: 0.9386345148086548, variance: 0.16888093948364258
Train size: [784] hidden size: [24] batch size: [10] trial: 44, train_loss: 1.123072, train acc: 8.333333333333334, test loss: 1.107989, test acc: 10.388666666666667, bias2: 0.9380655288696289, variance: 0.1699235439300537
Train size: [784] hidden size: [24] batch size: [10] trial: 45, train_loss: 1.126624, train acc: 8.152173913043478, test loss: 1.108737, test acc: 10.427391304347827, bias2: 0.9380937814712524, variance: 0.1706436723470688
Train size: [784] hidden size: [24] batch size: [10] trial: 46, train_loss: 1.127445, train acc: 7.97872340

Train size: [784] hidden size: [28] batch size: [10] trial: 30, train_loss: 1.080983, train acc: 12.903225806451612, test loss: 1.108917, test acc: 11.04548387096774, bias2: 0.9415962100028992, variance: 0.16732068359851837
Train size: [784] hidden size: [28] batch size: [10] trial: 31, train_loss: 1.080712, train acc: 13.28125, test loss: 1.112707, test acc: 11.061874999999997, bias2: 0.9438319802284241, variance: 0.16887527704238892
Train size: [784] hidden size: [28] batch size: [10] trial: 32, train_loss: 1.086720, train acc: 13.636363636363637, test loss: 1.112447, test acc: 11.155757575757574, bias2: 0.9442552328109741, variance: 0.16819217801094055
Train size: [784] hidden size: [28] batch size: [10] trial: 33, train_loss: 1.087460, train acc: 13.970588235294118, test loss: 1.114151, test acc: 11.193529411764704, bias2: 0.9438468813896179, variance: 0.17030449211597443
Train size: [784] hidden size: [28] batch size: [10] trial: 34, train_loss: 1.091367, train acc: 13.57142857142

Train size: [784] hidden size: [32] batch size: [10] trial: 17, train_loss: 1.028946, train acc: 11.11111111111111, test loss: 1.078825, test acc: 10.562222222222221, bias2: 0.926550567150116, variance: 0.15227480232715607
Train size: [784] hidden size: [32] batch size: [10] trial: 18, train_loss: 1.038121, train acc: 11.842105263157896, test loss: 1.078800, test acc: 10.696315789473681, bias2: 0.9273580312728882, variance: 0.15144191682338715
Train size: [784] hidden size: [32] batch size: [10] trial: 19, train_loss: 1.045416, train acc: 11.25, test loss: 1.078752, test acc: 10.767499999999998, bias2: 0.926966667175293, variance: 0.15178515017032623
Train size: [784] hidden size: [32] batch size: [10] trial: 20, train_loss: 1.056832, train acc: 10.714285714285714, test loss: 1.085761, test acc: 10.54142857142857, bias2: 0.931355893611908, variance: 0.15440529584884644
Train size: [784] hidden size: [32] batch size: [10] trial: 21, train_loss: 1.069671, train acc: 10.227272727272727, t

Train size: [784] hidden size: [37] batch size: [10] trial: 4, train_loss: 1.157236, train acc: 10.0, test loss: 1.080820, test acc: 12.354000000000001, bias2: 0.9531957507133484, variance: 0.12762407958507538
Train size: [784] hidden size: [37] batch size: [10] trial: 5, train_loss: 1.165882, train acc: 8.333333333333334, test loss: 1.084050, test acc: 11.725000000000001, bias2: 0.9503253102302551, variance: 0.1337241679430008
Train size: [784] hidden size: [37] batch size: [10] trial: 6, train_loss: 1.161802, train acc: 14.285714285714286, test loss: 1.089117, test acc: 11.205714285714288, bias2: 0.9507348537445068, variance: 0.13838186860084534
Train size: [784] hidden size: [37] batch size: [10] trial: 7, train_loss: 1.133034, train acc: 15.625, test loss: 1.088971, test acc: 11.198750000000002, bias2: 0.9460024237632751, variance: 0.1429690569639206
Train size: [784] hidden size: [37] batch size: [10] trial: 8, train_loss: 1.143286, train acc: 19.444444444444443, test loss: 1.0856

Train size: [784] hidden size: [37] batch size: [10] trial: 41, train_loss: 1.102701, train acc: 12.5, test loss: 1.089228, test acc: 11.610238095238097, bias2: 0.9171097874641418, variance: 0.17211826145648956
Train size: [784] hidden size: [37] batch size: [10] trial: 42, train_loss: 1.106862, train acc: 12.209302325581396, test loss: 1.088563, test acc: 11.592093023255815, bias2: 0.9172136783599854, variance: 0.17134977877140045
Train size: [784] hidden size: [37] batch size: [10] trial: 43, train_loss: 1.107450, train acc: 12.5, test loss: 1.089632, test acc: 11.536818181818184, bias2: 0.9185912609100342, variance: 0.1710411161184311
Train size: [784] hidden size: [37] batch size: [10] trial: 44, train_loss: 1.105754, train acc: 12.777777777777779, test loss: 1.088782, test acc: 11.53177777777778, bias2: 0.9182707071304321, variance: 0.17051170766353607
Train size: [784] hidden size: [37] batch size: [10] trial: 45, train_loss: 1.108320, train acc: 12.5, test loss: 1.088183, test a

Train size: [784] hidden size: [43] batch size: [10] trial: 29, train_loss: 1.056131, train acc: 8.333333333333334, test loss: 1.070706, test acc: 11.16266666666667, bias2: 0.9200009107589722, variance: 0.15070508420467377
Train size: [784] hidden size: [43] batch size: [10] trial: 30, train_loss: 1.058055, train acc: 8.870967741935484, test loss: 1.068335, test acc: 11.367096774193552, bias2: 0.9180673956871033, variance: 0.15026776492595673
Train size: [784] hidden size: [43] batch size: [10] trial: 31, train_loss: 1.056887, train acc: 8.59375, test loss: 1.067755, test acc: 11.309062500000003, bias2: 0.9170830249786377, variance: 0.15067173540592194
Train size: [784] hidden size: [43] batch size: [10] trial: 32, train_loss: 1.059199, train acc: 8.333333333333334, test loss: 1.067368, test acc: 11.324848484848488, bias2: 0.9164856672286987, variance: 0.15088200569152832
Train size: [784] hidden size: [43] batch size: [10] trial: 33, train_loss: 1.057414, train acc: 8.823529411764707,

Train size: [784] hidden size: [49] batch size: [10] trial: 17, train_loss: 1.051432, train acc: 13.88888888888889, test loss: 1.052620, test acc: 12.182777777777776, bias2: 0.9116161465644836, variance: 0.1410040408372879
Train size: [784] hidden size: [49] batch size: [10] trial: 18, train_loss: 1.037586, train acc: 14.473684210526315, test loss: 1.050470, test acc: 12.01157894736842, bias2: 0.9083729982376099, variance: 0.1420973837375641
Train size: [784] hidden size: [49] batch size: [10] trial: 19, train_loss: 1.032151, train acc: 15.0, test loss: 1.054346, test acc: 11.924499999999998, bias2: 0.9088007211685181, variance: 0.14554545283317566
Train size: [784] hidden size: [49] batch size: [10] trial: 20, train_loss: 1.040022, train acc: 14.285714285714286, test loss: 1.056912, test acc: 11.747142857142856, bias2: 0.910205602645874, variance: 0.14670692384243011
Train size: [784] hidden size: [49] batch size: [10] trial: 21, train_loss: 1.037117, train acc: 13.636363636363637, te

Train size: [784] hidden size: [56] batch size: [10] trial: 4, train_loss: 0.922066, train acc: 35.0, test loss: 1.038281, test acc: 13.589999999999998, bias2: 0.9207987785339355, variance: 0.11748196929693222
Train size: [784] hidden size: [56] batch size: [10] trial: 5, train_loss: 0.935254, train acc: 33.333333333333336, test loss: 1.030643, test acc: 14.551666666666664, bias2: 0.9074101448059082, variance: 0.12323322892189026
Train size: [784] hidden size: [56] batch size: [10] trial: 6, train_loss: 0.944642, train acc: 28.571428571428573, test loss: 1.031350, test acc: 13.694285714285712, bias2: 0.9078472256660461, variance: 0.12350232154130936
Train size: [784] hidden size: [56] batch size: [10] trial: 7, train_loss: 0.931463, train acc: 31.25, test loss: 1.039327, test acc: 13.059999999999999, bias2: 0.9111688137054443, variance: 0.1281580924987793
Train size: [784] hidden size: [56] batch size: [10] trial: 8, train_loss: 0.936031, train acc: 33.333333333333336, test loss: 1.045

Train size: [784] hidden size: [56] batch size: [10] trial: 41, train_loss: 1.043312, train acc: 12.5, test loss: 1.043443, test acc: 11.872857142857141, bias2: 0.8947798609733582, variance: 0.14866262674331665
Train size: [784] hidden size: [56] batch size: [10] trial: 42, train_loss: 1.041463, train acc: 12.209302325581396, test loss: 1.042474, test acc: 11.87627906976744, bias2: 0.8944223523139954, variance: 0.14805203676223755
Train size: [784] hidden size: [56] batch size: [10] trial: 43, train_loss: 1.037753, train acc: 13.068181818181818, test loss: 1.043525, test acc: 11.845681818181816, bias2: 0.8957145810127258, variance: 0.1478106528520584
Train size: [784] hidden size: [56] batch size: [10] trial: 44, train_loss: 1.040808, train acc: 12.777777777777779, test loss: 1.042740, test acc: 11.96311111111111, bias2: 0.8946419358253479, variance: 0.14809854328632355
Train size: [784] hidden size: [56] batch size: [10] trial: 45, train_loss: 1.039873, train acc: 13.043478260869565, 

Train size: [784] hidden size: [65] batch size: [10] trial: 29, train_loss: 1.035772, train acc: 9.166666666666666, test loss: 1.027509, test acc: 12.713333333333335, bias2: 0.8881455659866333, variance: 0.13936375081539154
Train size: [784] hidden size: [65] batch size: [10] trial: 30, train_loss: 1.028387, train acc: 10.483870967741936, test loss: 1.026991, test acc: 12.915483870967744, bias2: 0.8876867890357971, variance: 0.13930422067642212
Train size: [784] hidden size: [65] batch size: [10] trial: 31, train_loss: 1.023929, train acc: 10.9375, test loss: 1.026167, test acc: 12.965000000000002, bias2: 0.8875069618225098, variance: 0.13866020739078522
Train size: [784] hidden size: [65] batch size: [10] trial: 32, train_loss: 1.023011, train acc: 10.606060606060606, test loss: 1.026387, test acc: 12.835454545454548, bias2: 0.8875635266304016, variance: 0.13882307708263397
Train size: [784] hidden size: [65] batch size: [10] trial: 33, train_loss: 1.019074, train acc: 11.029411764705

Train size: [784] hidden size: [75] batch size: [10] trial: 17, train_loss: 1.065470, train acc: 6.944444444444445, test loss: 1.019997, test acc: 12.893333333333333, bias2: 0.8874497413635254, variance: 0.13254764676094055
Train size: [784] hidden size: [75] batch size: [10] trial: 18, train_loss: 1.058277, train acc: 6.578947368421052, test loss: 1.018057, test acc: 13.04, bias2: 0.8853878378868103, variance: 0.13266925513744354
Train size: [784] hidden size: [75] batch size: [10] trial: 19, train_loss: 1.064299, train acc: 6.25, test loss: 1.020455, test acc: 13.008500000000002, bias2: 0.8861847519874573, variance: 0.13427023589611053
Train size: [784] hidden size: [75] batch size: [10] trial: 20, train_loss: 1.051538, train acc: 7.142857142857143, test loss: 1.018740, test acc: 13.052857142857144, bias2: 0.8838472366333008, variance: 0.13489244878292084
Train size: [784] hidden size: [75] batch size: [10] trial: 21, train_loss: 1.044916, train acc: 7.954545454545454, test loss: 1.0

Train size: [784] hidden size: [86] batch size: [10] trial: 4, train_loss: 0.935165, train acc: 30.0, test loss: 1.012623, test acc: 11.626000000000001, bias2: 0.9039450287818909, variance: 0.10867805033922195
Train size: [784] hidden size: [86] batch size: [10] trial: 5, train_loss: 0.954132, train acc: 29.166666666666668, test loss: 1.010989, test acc: 11.931666666666667, bias2: 0.8972040414810181, variance: 0.11378540843725204
Train size: [784] hidden size: [86] batch size: [10] trial: 6, train_loss: 0.943148, train acc: 25.0, test loss: 1.007263, test acc: 12.365714285714287, bias2: 0.8892489075660706, variance: 0.11801368743181229
Train size: [784] hidden size: [86] batch size: [10] trial: 7, train_loss: 0.984498, train acc: 21.875, test loss: 1.011490, test acc: 12.02, bias2: 0.8936416506767273, variance: 0.1178480014204979
Train size: [784] hidden size: [86] batch size: [10] trial: 8, train_loss: 0.985370, train acc: 19.444444444444443, test loss: 1.010309, test acc: 11.94777777

Train size: [784] hidden size: [86] batch size: [10] trial: 41, train_loss: 1.007499, train acc: 14.285714285714286, test loss: 0.999298, test acc: 13.88619047619048, bias2: 0.8694229125976562, variance: 0.12987469136714935
Train size: [784] hidden size: [86] batch size: [10] trial: 42, train_loss: 1.006137, train acc: 13.953488372093023, test loss: 0.998322, test acc: 13.96558139534884, bias2: 0.8685553073883057, variance: 0.12976707518100739
Train size: [784] hidden size: [86] batch size: [10] trial: 43, train_loss: 1.010161, train acc: 13.636363636363637, test loss: 0.998654, test acc: 13.941363636363638, bias2: 0.8684811592102051, variance: 0.13017304241657257
Train size: [784] hidden size: [86] batch size: [10] trial: 44, train_loss: 1.012238, train acc: 13.333333333333334, test loss: 0.999678, test acc: 13.888000000000002, bias2: 0.8690239191055298, variance: 0.13065440952777863
Train size: [784] hidden size: [86] batch size: [10] trial: 45, train_loss: 1.010648, train acc: 13.58

Train size: [784] hidden size: [99] batch size: [10] trial: 29, train_loss: 0.955723, train acc: 20.833333333333332, test loss: 0.980692, test acc: 16.04866666666667, bias2: 0.8575069904327393, variance: 0.12318512797355652
Train size: [784] hidden size: [99] batch size: [10] trial: 30, train_loss: 0.954070, train acc: 20.967741935483872, test loss: 0.981001, test acc: 16.115161290322582, bias2: 0.8570232391357422, variance: 0.12397770583629608
Train size: [784] hidden size: [99] batch size: [10] trial: 31, train_loss: 0.949172, train acc: 21.875, test loss: 0.980867, test acc: 16.045312500000005, bias2: 0.8569023609161377, variance: 0.1239645928144455
Train size: [784] hidden size: [99] batch size: [10] trial: 32, train_loss: 0.945636, train acc: 21.21212121212121, test loss: 0.980583, test acc: 16.060000000000002, bias2: 0.8565486669540405, variance: 0.12403406947851181
Train size: [784] hidden size: [99] batch size: [10] trial: 33, train_loss: 0.944779, train acc: 20.58823529411765,

Train size: [784] hidden size: [114] batch size: [10] trial: 16, train_loss: 0.988960, train acc: 20.58823529411765, test loss: 0.979413, test acc: 15.249411764705886, bias2: 0.8584861755371094, variance: 0.12092719227075577
Train size: [784] hidden size: [114] batch size: [10] trial: 17, train_loss: 0.984626, train acc: 20.833333333333332, test loss: 0.979441, test acc: 15.263333333333337, bias2: 0.8581271171569824, variance: 0.12131354212760925
Train size: [784] hidden size: [114] batch size: [10] trial: 18, train_loss: 0.977708, train acc: 19.736842105263158, test loss: 0.976695, test acc: 15.582631578947371, bias2: 0.855407178401947, variance: 0.12128806114196777
Train size: [784] hidden size: [114] batch size: [10] trial: 19, train_loss: 0.971709, train acc: 21.25, test loss: 0.976117, test acc: 15.619500000000002, bias2: 0.8544714450836182, variance: 0.1216459721326828
Train size: [784] hidden size: [114] batch size: [10] trial: 20, train_loss: 0.983558, train acc: 20.23809523809

Train size: [784] hidden size: [131] batch size: [10] trial: 3, train_loss: 1.014083, train acc: 6.25, test loss: 0.950518, test acc: 17.424999999999997, bias2: 0.8614621162414551, variance: 0.08905577659606934
Train size: [784] hidden size: [131] batch size: [10] trial: 4, train_loss: 0.994879, train acc: 10.0, test loss: 0.958331, test acc: 16.541999999999998, bias2: 0.8600862622261047, variance: 0.09824477881193161
Train size: [784] hidden size: [131] batch size: [10] trial: 5, train_loss: 0.988154, train acc: 8.333333333333334, test loss: 0.964294, test acc: 15.853333333333332, bias2: 0.8617683053016663, variance: 0.102525994181633
Train size: [784] hidden size: [131] batch size: [10] trial: 6, train_loss: 0.968789, train acc: 10.714285714285714, test loss: 0.959025, test acc: 16.27857142857143, bias2: 0.854052722454071, variance: 0.10497242957353592
Train size: [784] hidden size: [131] batch size: [10] trial: 7, train_loss: 0.964562, train acc: 15.625, test loss: 0.963072, test ac

Train size: [784] hidden size: [131] batch size: [10] trial: 40, train_loss: 0.965173, train acc: 12.195121951219512, test loss: 0.962329, test acc: 16.70317073170732, bias2: 0.8400999307632446, variance: 0.12222933024168015
Train size: [784] hidden size: [131] batch size: [10] trial: 41, train_loss: 0.963097, train acc: 13.095238095238095, test loss: 0.962498, test acc: 16.68309523809524, bias2: 0.8399980068206787, variance: 0.12249995768070221
Train size: [784] hidden size: [131] batch size: [10] trial: 42, train_loss: 0.960125, train acc: 13.953488372093023, test loss: 0.962040, test acc: 16.71186046511628, bias2: 0.8396088480949402, variance: 0.12243115156888962
Train size: [784] hidden size: [131] batch size: [10] trial: 43, train_loss: 0.962946, train acc: 14.204545454545455, test loss: 0.962663, test acc: 16.707954545454548, bias2: 0.8403308391571045, variance: 0.12233206629753113
Train size: [784] hidden size: [131] batch size: [10] trial: 44, train_loss: 0.964804, train acc: 1

Train size: [784] hidden size: [151] batch size: [10] trial: 28, train_loss: 0.966061, train acc: 12.068965517241379, test loss: 0.947524, test acc: 18.04275862068966, bias2: 0.8286702036857605, variance: 0.11885379999876022
Train size: [784] hidden size: [151] batch size: [10] trial: 29, train_loss: 0.961326, train acc: 13.333333333333334, test loss: 0.947242, test acc: 18.016000000000005, bias2: 0.8282546401023865, variance: 0.11898694187402725
Train size: [784] hidden size: [151] batch size: [10] trial: 30, train_loss: 0.965548, train acc: 13.709677419354838, test loss: 0.947755, test acc: 18.040967741935486, bias2: 0.8284076452255249, variance: 0.11934750527143478
Train size: [784] hidden size: [151] batch size: [10] trial: 31, train_loss: 0.958058, train acc: 14.84375, test loss: 0.948197, test acc: 18.049375, bias2: 0.8281508088111877, variance: 0.1200464591383934
Train size: [784] hidden size: [151] batch size: [10] trial: 32, train_loss: 0.955568, train acc: 15.151515151515152,

Train size: [784] hidden size: [174] batch size: [10] trial: 15, train_loss: 0.941602, train acc: 12.5, test loss: 0.932394, test acc: 20.820624999999996, bias2: 0.8187997937202454, variance: 0.1135944053530693
Train size: [784] hidden size: [174] batch size: [10] trial: 16, train_loss: 0.936530, train acc: 14.705882352941176, test loss: 0.932772, test acc: 20.86588235294117, bias2: 0.817932665348053, variance: 0.11483971774578094
Train size: [784] hidden size: [174] batch size: [10] trial: 17, train_loss: 0.928365, train acc: 16.666666666666668, test loss: 0.931680, test acc: 20.926666666666662, bias2: 0.816239595413208, variance: 0.11544043570756912
Train size: [784] hidden size: [174] batch size: [10] trial: 18, train_loss: 0.932452, train acc: 17.105263157894736, test loss: 0.930703, test acc: 21.055263157894732, bias2: 0.8150256872177124, variance: 0.11567697674036026
Train size: [784] hidden size: [174] batch size: [10] trial: 19, train_loss: 0.926704, train acc: 18.75, test loss

Train size: [784] hidden size: [201] batch size: [10] trial: 2, train_loss: 0.915442, train acc: 8.333333333333334, test loss: 0.905288, test acc: 25.723333333333333, bias2: 0.8216217756271362, variance: 0.08366600424051285
Train size: [784] hidden size: [201] batch size: [10] trial: 3, train_loss: 0.897370, train acc: 12.5, test loss: 0.913490, test acc: 24.7225, bias2: 0.8206735849380493, variance: 0.09281684458255768
Train size: [784] hidden size: [201] batch size: [10] trial: 4, train_loss: 0.916071, train acc: 15.0, test loss: 0.912482, test acc: 24.398000000000003, bias2: 0.8143129348754883, variance: 0.09816887974739075
Train size: [784] hidden size: [201] batch size: [10] trial: 5, train_loss: 0.927322, train acc: 12.5, test loss: 0.921827, test acc: 22.768333333333334, bias2: 0.819754421710968, variance: 0.10207244008779526
Train size: [784] hidden size: [201] batch size: [10] trial: 6, train_loss: 0.908432, train acc: 17.857142857142858, test loss: 0.926034, test acc: 21.8071

Train size: [784] hidden size: [201] batch size: [10] trial: 39, train_loss: 0.937168, train acc: 20.0, test loss: 0.930172, test acc: 20.420499999999997, bias2: 0.8130687475204468, variance: 0.11710307747125626
Train size: [784] hidden size: [201] batch size: [10] trial: 40, train_loss: 0.937219, train acc: 19.51219512195122, test loss: 0.929192, test acc: 20.52780487804878, bias2: 0.8119834065437317, variance: 0.11720818281173706
Train size: [784] hidden size: [201] batch size: [10] trial: 41, train_loss: 0.935937, train acc: 19.642857142857142, test loss: 0.929310, test acc: 20.46666666666667, bias2: 0.8120797872543335, variance: 0.11723044514656067
Train size: [784] hidden size: [201] batch size: [10] trial: 42, train_loss: 0.939771, train acc: 19.186046511627907, test loss: 0.929491, test acc: 20.426511627906976, bias2: 0.8122365474700928, variance: 0.1172543615102768
Train size: [784] hidden size: [201] batch size: [10] trial: 43, train_loss: 0.937974, train acc: 18.75, test loss

Train size: [784] hidden size: [231] batch size: [10] trial: 27, train_loss: 0.915601, train acc: 19.642857142857142, test loss: 0.914033, test acc: 22.80321428571429, bias2: 0.8012487888336182, variance: 0.11278470605611801
Train size: [784] hidden size: [231] batch size: [10] trial: 28, train_loss: 0.913244, train acc: 19.82758620689655, test loss: 0.913251, test acc: 22.8748275862069, bias2: 0.800658643245697, variance: 0.11259245872497559
Train size: [784] hidden size: [231] batch size: [10] trial: 29, train_loss: 0.902516, train acc: 21.666666666666668, test loss: 0.912923, test acc: 22.989333333333335, bias2: 0.7999694347381592, variance: 0.11295399814844131
Train size: [784] hidden size: [231] batch size: [10] trial: 30, train_loss: 0.904000, train acc: 21.774193548387096, test loss: 0.912785, test acc: 23.028709677419357, bias2: 0.7995986342430115, variance: 0.11318601667881012
Train size: [784] hidden size: [231] batch size: [10] trial: 31, train_loss: 0.893937, train acc: 24.

Train size: [784] hidden size: [266] batch size: [10] trial: 14, train_loss: 0.868132, train acc: 33.333333333333336, test loss: 0.890386, test acc: 26.45266666666667, bias2: 0.7810887098312378, variance: 0.10929737985134125
Train size: [784] hidden size: [266] batch size: [10] trial: 15, train_loss: 0.878608, train acc: 31.25, test loss: 0.891072, test acc: 26.201875, bias2: 0.7813573479652405, variance: 0.10971461236476898
Train size: [784] hidden size: [266] batch size: [10] trial: 16, train_loss: 0.888151, train acc: 29.41176470588235, test loss: 0.892343, test acc: 26.006470588235295, bias2: 0.7821998000144958, variance: 0.11014324426651001
Train size: [784] hidden size: [266] batch size: [10] trial: 17, train_loss: 0.894537, train acc: 27.77777777777778, test loss: 0.890486, test acc: 26.42777777777778, bias2: 0.7798985838890076, variance: 0.11058748513460159
Train size: [784] hidden size: [266] batch size: [10] trial: 18, train_loss: 0.894154, train acc: 27.63157894736842, test 

Train size: [784] hidden size: [306] batch size: [10] trial: 1, train_loss: 0.845871, train acc: 37.5, test loss: 0.873178, test acc: 30.975, bias2: 0.8238208293914795, variance: 0.049356862902641296
Train size: [784] hidden size: [306] batch size: [10] trial: 2, train_loss: 0.883890, train acc: 33.333333333333336, test loss: 0.870903, test acc: 30.543333333333333, bias2: 0.7993026375770569, variance: 0.07159987837076187
Train size: [784] hidden size: [306] batch size: [10] trial: 3, train_loss: 0.888307, train acc: 31.25, test loss: 0.869974, test acc: 31.107499999999998, bias2: 0.787171483039856, variance: 0.08280275017023087
Train size: [784] hidden size: [306] batch size: [10] trial: 4, train_loss: 0.874020, train acc: 35.0, test loss: 0.863581, test acc: 31.998, bias2: 0.7758605480194092, variance: 0.08772053569555283
Train size: [784] hidden size: [306] batch size: [10] trial: 5, train_loss: 0.898712, train acc: 33.333333333333336, test loss: 0.868309, test acc: 31.19666666666667

Train size: [784] hidden size: [306] batch size: [10] trial: 38, train_loss: 0.895493, train acc: 27.564102564102566, test loss: 0.881467, test acc: 28.093846153846155, bias2: 0.7712989449501038, variance: 0.11016838997602463
Train size: [784] hidden size: [306] batch size: [10] trial: 39, train_loss: 0.893202, train acc: 27.5, test loss: 0.881879, test acc: 28.069, bias2: 0.77165687084198, variance: 0.11022171378135681
Train size: [784] hidden size: [306] batch size: [10] trial: 40, train_loss: 0.896883, train acc: 26.829268292682926, test loss: 0.882609, test acc: 27.992682926829268, bias2: 0.7721553444862366, variance: 0.11045374721288681
Train size: [784] hidden size: [306] batch size: [10] trial: 41, train_loss: 0.897061, train acc: 26.785714285714285, test loss: 0.882728, test acc: 28.049523809523812, bias2: 0.7719911932945251, variance: 0.11073700338602066
Train size: [784] hidden size: [306] batch size: [10] trial: 42, train_loss: 0.893920, train acc: 27.906976744186046, test l

Train size: [784] hidden size: [353] batch size: [10] trial: 26, train_loss: 0.835731, train acc: 33.333333333333336, test loss: 0.862718, test acc: 30.76222222222222, bias2: 0.75733882188797, variance: 0.10537882894277573
Train size: [784] hidden size: [353] batch size: [10] trial: 27, train_loss: 0.832249, train acc: 33.92857142857143, test loss: 0.862328, test acc: 30.838571428571424, bias2: 0.7567043304443359, variance: 0.10562323033809662
Train size: [784] hidden size: [353] batch size: [10] trial: 28, train_loss: 0.828560, train acc: 35.3448275862069, test loss: 0.862346, test acc: 30.876206896551718, bias2: 0.7563436627388, variance: 0.1060018390417099
Train size: [784] hidden size: [353] batch size: [10] trial: 29, train_loss: 0.830407, train acc: 35.0, test loss: 0.862658, test acc: 30.902333333333328, bias2: 0.7564463019371033, variance: 0.106211818754673
Train size: [784] hidden size: [353] batch size: [10] trial: 30, train_loss: 0.829521, train acc: 35.483870967741936, test

Train size: [784] hidden size: [406] batch size: [10] trial: 13, train_loss: 0.861561, train acc: 26.785714285714285, test loss: 0.840502, test acc: 35.50857142857142, bias2: 0.7406666874885559, variance: 0.09983506053686142
Train size: [784] hidden size: [406] batch size: [10] trial: 14, train_loss: 0.859982, train acc: 26.666666666666668, test loss: 0.841577, test acc: 35.26, bias2: 0.7417020797729492, variance: 0.09987449645996094
Train size: [784] hidden size: [406] batch size: [10] trial: 15, train_loss: 0.868198, train acc: 28.125, test loss: 0.842711, test acc: 35.012499999999996, bias2: 0.7422829866409302, variance: 0.10042772442102432
Train size: [784] hidden size: [406] batch size: [10] trial: 16, train_loss: 0.861577, train acc: 29.41176470588235, test loss: 0.842038, test acc: 35.156470588235294, bias2: 0.7417311072349548, variance: 0.1003071740269661
Train size: [784] hidden size: [406] batch size: [10] trial: 17, train_loss: 0.863482, train acc: 30.555555555555557, test l

Train size: [784] hidden size: [468] batch size: [10] trial: 0, train_loss: 0.771121, train acc: 75.0, test loss: 0.799979, test acc: 42.06, bias2: 0.7999786138534546, variance: -9.731370154142382e-10
Train size: [784] hidden size: [468] batch size: [10] trial: 1, train_loss: 0.798476, train acc: 62.5, test loss: 0.817524, test acc: 39.86, bias2: 0.7595955729484558, variance: 0.05792885646224022
Train size: [784] hidden size: [468] batch size: [10] trial: 2, train_loss: 0.772780, train acc: 66.66666666666667, test loss: 0.826713, test acc: 36.98, bias2: 0.750930905342102, variance: 0.07578244805335999
Train size: [784] hidden size: [468] batch size: [10] trial: 3, train_loss: 0.752787, train acc: 68.75, test loss: 0.819671, test acc: 38.215, bias2: 0.7362914085388184, variance: 0.08337972313165665
Train size: [784] hidden size: [468] batch size: [10] trial: 4, train_loss: 0.746670, train acc: 70.0, test loss: 0.823331, test acc: 37.142, bias2: 0.7357349991798401, variance: 0.0875961333

Train size: [784] hidden size: [468] batch size: [10] trial: 38, train_loss: 0.797683, train acc: 42.30769230769231, test loss: 0.819980, test acc: 38.767692307692315, bias2: 0.7164753079414368, variance: 0.10350441932678223
Train size: [784] hidden size: [468] batch size: [10] trial: 39, train_loss: 0.795086, train acc: 43.125, test loss: 0.819813, test acc: 38.84250000000001, bias2: 0.7162322998046875, variance: 0.10358057171106339
Train size: [784] hidden size: [468] batch size: [10] trial: 40, train_loss: 0.796947, train acc: 42.68292682926829, test loss: 0.820297, test acc: 38.70487804878049, bias2: 0.7167869210243225, variance: 0.10350994765758514
Train size: [784] hidden size: [468] batch size: [10] trial: 41, train_loss: 0.795819, train acc: 43.45238095238095, test loss: 0.820182, test acc: 38.68309523809525, bias2: 0.7165820598602295, variance: 0.10360021144151688
Train size: [784] hidden size: [468] batch size: [10] trial: 42, train_loss: 0.800208, train acc: 43.6046511627906

Train size: [784] hidden size: [538] batch size: [10] trial: 26, train_loss: 0.786990, train acc: 49.074074074074076, test loss: 0.798005, test acc: 43.065185185185186, bias2: 0.6972885131835938, variance: 0.10071608424186707
Train size: [784] hidden size: [538] batch size: [10] trial: 27, train_loss: 0.786272, train acc: 49.107142857142854, test loss: 0.798122, test acc: 43.07285714285714, bias2: 0.6974200010299683, variance: 0.10070187598466873
Train size: [784] hidden size: [538] batch size: [10] trial: 28, train_loss: 0.790266, train acc: 48.275862068965516, test loss: 0.797700, test acc: 43.12275862068965, bias2: 0.6967522501945496, variance: 0.10094783455133438
Train size: [784] hidden size: [538] batch size: [10] trial: 29, train_loss: 0.794466, train acc: 47.5, test loss: 0.796978, test acc: 43.19866666666667, bias2: 0.695874035358429, variance: 0.10110416263341904
Train size: [784] hidden size: [538] batch size: [10] trial: 30, train_loss: 0.794920, train acc: 48.3870967741935

Train size: [784] hidden size: [620] batch size: [10] trial: 13, train_loss: 0.735398, train acc: 55.357142857142854, test loss: 0.772164, test acc: 46.899285714285725, bias2: 0.6765202879905701, variance: 0.09564322978258133
Train size: [784] hidden size: [620] batch size: [10] trial: 14, train_loss: 0.721628, train acc: 56.666666666666664, test loss: 0.770229, test acc: 47.22400000000001, bias2: 0.6746964454650879, variance: 0.09553281217813492
Train size: [784] hidden size: [620] batch size: [10] trial: 15, train_loss: 0.728148, train acc: 54.6875, test loss: 0.768487, test acc: 47.59750000000001, bias2: 0.6728902459144592, variance: 0.09559695422649384
Train size: [784] hidden size: [620] batch size: [10] trial: 16, train_loss: 0.726148, train acc: 55.88235294117647, test loss: 0.768187, test acc: 47.74529411764707, bias2: 0.6725611686706543, variance: 0.09562595188617706
Train size: [784] hidden size: [620] batch size: [10] trial: 17, train_loss: 0.722867, train acc: 56.9444444444

Train size: [784] hidden size: [714] batch size: [10] trial: 0, train_loss: 0.625621, train acc: 50.0, test loss: 0.749642, test acc: 50.78, bias2: 0.7496424317359924, variance: 1.9462739753173253e-10
Train size: [784] hidden size: [714] batch size: [10] trial: 1, train_loss: 0.706077, train acc: 50.0, test loss: 0.752675, test acc: 49.465, bias2: 0.7039028406143188, variance: 0.04877181351184845
Train size: [784] hidden size: [714] batch size: [10] trial: 2, train_loss: 0.774073, train acc: 33.333333333333336, test loss: 0.751035, test acc: 49.92666666666667, bias2: 0.6864446997642517, variance: 0.0645902156829834
Train size: [784] hidden size: [714] batch size: [10] trial: 3, train_loss: 0.769880, train acc: 31.25, test loss: 0.750697, test acc: 50.365, bias2: 0.6775305271148682, variance: 0.07316620647907257
Train size: [784] hidden size: [714] batch size: [10] trial: 4, train_loss: 0.778168, train acc: 35.0, test loss: 0.749941, test acc: 50.168, bias2: 0.6726243495941162, variance

Train size: [784] hidden size: [714] batch size: [10] trial: 38, train_loss: 0.751342, train acc: 52.56410256410256, test loss: 0.754821, test acc: 49.28820512820513, bias2: 0.659045934677124, variance: 0.095774807035923
Train size: [784] hidden size: [714] batch size: [10] trial: 39, train_loss: 0.756569, train acc: 51.25, test loss: 0.754507, test acc: 49.3445, bias2: 0.6585610508918762, variance: 0.09594600647687912
Train size: [784] hidden size: [714] batch size: [10] trial: 40, train_loss: 0.757627, train acc: 50.609756097560975, test loss: 0.755346, test acc: 49.175609756097565, bias2: 0.6592504978179932, variance: 0.09609544277191162
Train size: [784] hidden size: [714] batch size: [10] trial: 41, train_loss: 0.759599, train acc: 50.0, test loss: 0.755208, test acc: 49.151428571428575, bias2: 0.6590896844863892, variance: 0.09611798077821732
Train size: [784] hidden size: [714] batch size: [10] trial: 42, train_loss: 0.756820, train acc: 50.58139534883721, test loss: 0.754462, t

Train size: [784] hidden size: [822] batch size: [10] trial: 26, train_loss: 0.679074, train acc: 62.03703703703704, test loss: 0.732360, test acc: 52.93925925925925, bias2: 0.6407224535942078, variance: 0.09163777530193329
Train size: [784] hidden size: [822] batch size: [10] trial: 27, train_loss: 0.689040, train acc: 61.607142857142854, test loss: 0.732094, test acc: 52.99142857142857, bias2: 0.6401552557945251, variance: 0.09193911403417587
Train size: [784] hidden size: [822] batch size: [10] trial: 28, train_loss: 0.693932, train acc: 61.206896551724135, test loss: 0.732145, test acc: 52.98068965517241, bias2: 0.6404227614402771, variance: 0.09172258526086807
Train size: [784] hidden size: [822] batch size: [10] trial: 29, train_loss: 0.694905, train acc: 60.833333333333336, test loss: 0.731417, test acc: 53.13700000000001, bias2: 0.6393401622772217, variance: 0.09207669645547867
Train size: [784] hidden size: [822] batch size: [10] trial: 30, train_loss: 0.694052, train acc: 60.

Train size: [784] hidden size: [946] batch size: [10] trial: 14, train_loss: 0.658822, train acc: 63.333333333333336, test loss: 0.705701, test acc: 56.88133333333334, bias2: 0.6214942932128906, variance: 0.08420699089765549
Train size: [784] hidden size: [946] batch size: [10] trial: 15, train_loss: 0.655505, train acc: 62.5, test loss: 0.705256, test acc: 56.917500000000004, bias2: 0.6203081607818604, variance: 0.08494759351015091
Train size: [784] hidden size: [946] batch size: [10] trial: 16, train_loss: 0.652146, train acc: 61.76470588235294, test loss: 0.705319, test acc: 56.91235294117648, bias2: 0.6201502084732056, variance: 0.08516833931207657
Train size: [784] hidden size: [946] batch size: [10] trial: 17, train_loss: 0.648492, train acc: 62.5, test loss: 0.705970, test acc: 56.82944444444445, bias2: 0.6203916668891907, variance: 0.08557809144258499
Train size: [784] hidden size: [946] batch size: [10] trial: 18, train_loss: 0.657718, train acc: 61.8421052631579, test loss: 0

Train size: [784] hidden size: [1089] batch size: [10] trial: 1, train_loss: 0.502216, train acc: 100.0, test loss: 0.689083, test acc: 58.17, bias2: 0.6420285701751709, variance: 0.047053948044776917
Train size: [784] hidden size: [1089] batch size: [10] trial: 2, train_loss: 0.555020, train acc: 83.33333333333333, test loss: 0.685561, test acc: 58.42333333333334, bias2: 0.6257922053337097, variance: 0.05976906046271324
Train size: [784] hidden size: [1089] batch size: [10] trial: 3, train_loss: 0.588480, train acc: 75.0, test loss: 0.684831, test acc: 58.352500000000006, bias2: 0.618257462978363, variance: 0.0665733590722084
Train size: [784] hidden size: [1089] batch size: [10] trial: 4, train_loss: 0.588621, train acc: 75.0, test loss: 0.678522, test acc: 59.25200000000001, bias2: 0.6072031259536743, variance: 0.07131916284561157
Train size: [784] hidden size: [1089] batch size: [10] trial: 5, train_loss: 0.603564, train acc: 70.83333333333333, test loss: 0.681257, test acc: 59.015

Train size: [784] hidden size: [1089] batch size: [10] trial: 38, train_loss: 0.652518, train acc: 62.17948717948718, test loss: 0.679194, test acc: 60.17102564102562, bias2: 0.592875599861145, variance: 0.0863184705376625
Train size: [784] hidden size: [1089] batch size: [10] trial: 39, train_loss: 0.654297, train acc: 62.5, test loss: 0.679343, test acc: 60.12224999999997, bias2: 0.5929726362228394, variance: 0.08637078106403351
Train size: [784] hidden size: [1089] batch size: [10] trial: 40, train_loss: 0.646661, train acc: 63.41463414634146, test loss: 0.679081, test acc: 60.12804878048778, bias2: 0.5926486253738403, variance: 0.08643259108066559
Train size: [784] hidden size: [1089] batch size: [10] trial: 41, train_loss: 0.650618, train acc: 63.095238095238095, test loss: 0.679626, test acc: 60.06285714285712, bias2: 0.593120276927948, variance: 0.08650606125593185
Train size: [784] hidden size: [1089] batch size: [10] trial: 42, train_loss: 0.646299, train acc: 63.9534883720930

Train size: [784] hidden size: [1254] batch size: [10] trial: 25, train_loss: 0.648100, train acc: 61.53846153846154, test loss: 0.657390, test acc: 62.91653846153849, bias2: 0.57572340965271, variance: 0.08166663348674774
Train size: [784] hidden size: [1254] batch size: [10] trial: 26, train_loss: 0.654380, train acc: 60.18518518518518, test loss: 0.657625, test acc: 62.93481481481484, bias2: 0.5756587386131287, variance: 0.08196607977151871
Train size: [784] hidden size: [1254] batch size: [10] trial: 27, train_loss: 0.655270, train acc: 60.714285714285715, test loss: 0.657761, test acc: 62.93964285714288, bias2: 0.5754978060722351, variance: 0.08226322382688522
Train size: [784] hidden size: [1254] batch size: [10] trial: 28, train_loss: 0.651428, train acc: 61.206896551724135, test loss: 0.657710, test acc: 62.91482758620692, bias2: 0.5754001140594482, variance: 0.08230943232774734
Train size: [784] hidden size: [1254] batch size: [10] trial: 29, train_loss: 0.649667, train acc: 6

Train size: [784] hidden size: [1444] batch size: [10] trial: 12, train_loss: 0.618206, train acc: 69.23076923076923, test loss: 0.630757, test acc: 66.41461538461537, bias2: 0.5555335283279419, variance: 0.07522343099117279
Train size: [784] hidden size: [1444] batch size: [10] trial: 13, train_loss: 0.614766, train acc: 67.85714285714286, test loss: 0.631380, test acc: 66.31928571428571, bias2: 0.5555240511894226, variance: 0.07585550844669342
Train size: [784] hidden size: [1444] batch size: [10] trial: 14, train_loss: 0.608309, train acc: 70.0, test loss: 0.631499, test acc: 66.136, bias2: 0.5550808310508728, variance: 0.07641785591840744
Train size: [784] hidden size: [1444] batch size: [10] trial: 15, train_loss: 0.617716, train acc: 70.3125, test loss: 0.631889, test acc: 65.936875, bias2: 0.5548590421676636, variance: 0.07703009247779846
Train size: [784] hidden size: [1444] batch size: [10] trial: 16, train_loss: 0.612620, train acc: 72.05882352941177, test loss: 0.632585, tes

Train size: [784] hidden size: [1444] batch size: [10] trial: 49, train_loss: 0.633272, train acc: 68.5, test loss: 0.632582, test acc: 65.682, bias2: 0.5513876080513, variance: 0.08119484037160873
##################################################
Train size: [784] hidden size: [1663] batch size: [10] trial: 0, train_loss: 0.431119, train acc: 100.0, test loss: 0.599150, test acc: 68.71, bias2: 0.5991496443748474, variance: -4.4764303375188774e-09
Train size: [784] hidden size: [1663] batch size: [10] trial: 1, train_loss: 0.415100, train acc: 100.0, test loss: 0.613625, test acc: 66.225, bias2: 0.5709326267242432, variance: 0.042692605406045914
Train size: [784] hidden size: [1663] batch size: [10] trial: 2, train_loss: 0.473420, train acc: 83.33333333333333, test loss: 0.613968, test acc: 66.51333333333334, bias2: 0.5587524771690369, variance: 0.05521531403064728
Train size: [784] hidden size: [1663] batch size: [10] trial: 3, train_loss: 0.501787, train acc: 81.25, test loss: 0.610

Train size: [784] hidden size: [1663] batch size: [10] trial: 37, train_loss: 0.602519, train acc: 71.05263157894737, test loss: 0.612137, test acc: 68.10315789473684, bias2: 0.5338424444198608, variance: 0.07829441130161285
Train size: [784] hidden size: [1663] batch size: [10] trial: 38, train_loss: 0.606277, train acc: 71.15384615384616, test loss: 0.612028, test acc: 68.0874358974359, bias2: 0.5336858034133911, variance: 0.07834187150001526
Train size: [784] hidden size: [1663] batch size: [10] trial: 39, train_loss: 0.604711, train acc: 71.25, test loss: 0.612012, test acc: 68.06725, bias2: 0.5335797667503357, variance: 0.078432098031044
Train size: [784] hidden size: [1663] batch size: [10] trial: 40, train_loss: 0.604597, train acc: 70.73170731707317, test loss: 0.611865, test acc: 68.0909756097561, bias2: 0.5334452390670776, variance: 0.07841970026493073
Train size: [784] hidden size: [1663] batch size: [10] trial: 41, train_loss: 0.607687, train acc: 70.83333333333333, test lo

Train size: [784] hidden size: [1915] batch size: [10] trial: 24, train_loss: 0.563620, train acc: 73.0, test loss: 0.587796, test acc: 70.49319999999999, bias2: 0.513749897480011, variance: 0.0740463063120842
Train size: [784] hidden size: [1915] batch size: [10] trial: 25, train_loss: 0.576626, train acc: 71.15384615384616, test loss: 0.587934, test acc: 70.4853846153846, bias2: 0.513769268989563, variance: 0.07416455447673798
Train size: [784] hidden size: [1915] batch size: [10] trial: 26, train_loss: 0.572467, train acc: 72.22222222222223, test loss: 0.587918, test acc: 70.49333333333333, bias2: 0.5136857628822327, variance: 0.07423210889101028
Train size: [784] hidden size: [1915] batch size: [10] trial: 27, train_loss: 0.567829, train acc: 72.32142857142857, test loss: 0.588416, test acc: 70.50785714285713, bias2: 0.5139253735542297, variance: 0.0744902566075325
Train size: [784] hidden size: [1915] batch size: [10] trial: 28, train_loss: 0.567245, train acc: 72.41379310344827, 

Train size: [784] hidden size: [2204] batch size: [10] trial: 11, train_loss: 0.540239, train acc: 79.16666666666667, test loss: 0.574769, test acc: 71.65083333333334, bias2: 0.5051248669624329, variance: 0.06964389979839325
Train size: [784] hidden size: [2204] batch size: [10] trial: 12, train_loss: 0.546432, train acc: 76.92307692307692, test loss: 0.574745, test acc: 71.63769230769232, bias2: 0.5046477317810059, variance: 0.07009725272655487
Train size: [784] hidden size: [2204] batch size: [10] trial: 13, train_loss: 0.542609, train acc: 76.78571428571429, test loss: 0.575105, test acc: 71.76214285714286, bias2: 0.5045003294944763, variance: 0.07060455530881882
Train size: [784] hidden size: [2204] batch size: [10] trial: 14, train_loss: 0.536664, train acc: 78.33333333333333, test loss: 0.574621, test acc: 71.69000000000001, bias2: 0.5037961602210999, variance: 0.07082502543926239
Train size: [784] hidden size: [2204] batch size: [10] trial: 15, train_loss: 0.548169, train acc: 7

Train size: [784] hidden size: [2204] batch size: [10] trial: 49, train_loss: 0.549167, train acc: 76.0, test loss: 0.573762, test acc: 71.95500000000001, bias2: 0.5001378655433655, variance: 0.07362433522939682
##################################################
Train size: [784] hidden size: [2538] batch size: [10] trial: 0, train_loss: 0.370261, train acc: 100.0, test loss: 0.550142, test acc: 74.04, bias2: 0.5501424074172974, variance: 3.8925479506346505e-10
Train size: [784] hidden size: [2538] batch size: [10] trial: 1, train_loss: 0.553062, train acc: 75.0, test loss: 0.547645, test acc: 74.265, bias2: 0.5100464224815369, variance: 0.037598416209220886
Train size: [784] hidden size: [2538] batch size: [10] trial: 2, train_loss: 0.478278, train acc: 83.33333333333333, test loss: 0.550785, test acc: 73.89999999999999, bias2: 0.5021836161613464, variance: 0.04860164597630501
Train size: [784] hidden size: [2538] batch size: [10] trial: 3, train_loss: 0.441951, train acc: 87.5, test 

Train size: [784] hidden size: [2538] batch size: [10] trial: 37, train_loss: 0.500278, train acc: 81.57894736842105, test loss: 0.555230, test acc: 73.8257894736842, bias2: 0.48399290442466736, variance: 0.0712372288107872
Train size: [784] hidden size: [2538] batch size: [10] trial: 38, train_loss: 0.502317, train acc: 81.41025641025641, test loss: 0.555279, test acc: 73.82615384615384, bias2: 0.4840788245201111, variance: 0.07120008766651154
Train size: [784] hidden size: [2538] batch size: [10] trial: 39, train_loss: 0.503032, train acc: 81.25, test loss: 0.555097, test acc: 73.88275, bias2: 0.48386117815971375, variance: 0.07123555988073349
Train size: [784] hidden size: [2538] batch size: [10] trial: 40, train_loss: 0.507219, train acc: 81.09756097560975, test loss: 0.555166, test acc: 73.87243902439025, bias2: 0.48388972878456116, variance: 0.0712759867310524
Train size: [784] hidden size: [2538] batch size: [10] trial: 41, train_loss: 0.507321, train acc: 81.54761904761905, tes

Train size: [784] hidden size: [2922] batch size: [10] trial: 24, train_loss: 0.458617, train acc: 84.0, test loss: 0.537320, test acc: 75.17680000000001, bias2: 0.4686568081378937, variance: 0.06866278499364853
Train size: [784] hidden size: [2922] batch size: [10] trial: 25, train_loss: 0.459420, train acc: 84.61538461538461, test loss: 0.537445, test acc: 75.105, bias2: 0.4686526656150818, variance: 0.06879221647977829
Train size: [784] hidden size: [2922] batch size: [10] trial: 26, train_loss: 0.457002, train acc: 84.25925925925925, test loss: 0.537356, test acc: 75.09555555555556, bias2: 0.4684774577617645, variance: 0.06887802481651306
Train size: [784] hidden size: [2922] batch size: [10] trial: 27, train_loss: 0.461108, train acc: 83.92857142857143, test loss: 0.537660, test acc: 75.13428571428572, bias2: 0.46876806020736694, variance: 0.06889211386442184
Train size: [784] hidden size: [2922] batch size: [10] trial: 28, train_loss: 0.459799, train acc: 83.62068965517241, test 

Train size: [784] hidden size: [3365] batch size: [10] trial: 11, train_loss: 0.413069, train acc: 89.58333333333333, test loss: 0.523016, test acc: 77.165, bias2: 0.45954659581184387, variance: 0.0634695515036583
Train size: [784] hidden size: [3365] batch size: [10] trial: 12, train_loss: 0.408511, train acc: 90.38461538461539, test loss: 0.523193, test acc: 77.02307692307694, bias2: 0.459386944770813, variance: 0.06380564719438553
Train size: [784] hidden size: [3365] batch size: [10] trial: 13, train_loss: 0.410236, train acc: 91.07142857142857, test loss: 0.523060, test acc: 76.98214285714288, bias2: 0.45911580324172974, variance: 0.06394462287425995
Train size: [784] hidden size: [3365] batch size: [10] trial: 14, train_loss: 0.414644, train acc: 90.0, test loss: 0.522685, test acc: 76.96466666666669, bias2: 0.45849525928497314, variance: 0.06418998539447784
Train size: [784] hidden size: [3365] batch size: [10] trial: 15, train_loss: 0.411367, train acc: 90.625, test loss: 0.522

Train size: [784] hidden size: [3365] batch size: [10] trial: 48, train_loss: 0.443440, train acc: 84.18367346938776, test loss: 0.522116, test acc: 76.86857142857144, bias2: 0.45456868410110474, variance: 0.06754691153764725
Train size: [784] hidden size: [3365] batch size: [10] trial: 49, train_loss: 0.442061, train acc: 84.5, test loss: 0.522102, test acc: 76.86300000000001, bias2: 0.4545289874076843, variance: 0.06757300347089767
##################################################
Train size: [784] hidden size: [3874] batch size: [10] trial: 0, train_loss: 0.444831, train acc: 100.0, test loss: 0.499809, test acc: 78.85, bias2: 0.4998089075088501, variance: -3.5032932110823367e-09
Train size: [784] hidden size: [3874] batch size: [10] trial: 1, train_loss: 0.450504, train acc: 100.0, test loss: 0.506683, test acc: 78.125, bias2: 0.4728585481643677, variance: 0.03382466733455658
Train size: [784] hidden size: [3874] batch size: [10] trial: 2, train_loss: 0.478674, train acc: 91.66666

Train size: [784] hidden size: [3874] batch size: [10] trial: 35, train_loss: 0.457174, train acc: 81.94444444444444, test loss: 0.508228, test acc: 77.96055555555553, bias2: 0.44328761100769043, variance: 0.06494015455245972
Train size: [784] hidden size: [3874] batch size: [10] trial: 36, train_loss: 0.459482, train acc: 81.75675675675676, test loss: 0.508340, test acc: 77.97675675675673, bias2: 0.44340115785598755, variance: 0.06493836641311646
Train size: [784] hidden size: [3874] batch size: [10] trial: 37, train_loss: 0.460103, train acc: 80.92105263157895, test loss: 0.508399, test acc: 77.95289473684208, bias2: 0.44334474205970764, variance: 0.06505444645881653
Train size: [784] hidden size: [3874] batch size: [10] trial: 38, train_loss: 0.457636, train acc: 81.41025641025641, test loss: 0.508537, test acc: 77.895641025641, bias2: 0.4432927072048187, variance: 0.0652446523308754
Train size: [784] hidden size: [3874] batch size: [10] trial: 39, train_loss: 0.457431, train acc: 8

Train size: [784] hidden size: [4461] batch size: [10] trial: 22, train_loss: 0.391876, train acc: 89.1304347826087, test loss: 0.494235, test acc: 79.04391304347826, bias2: 0.4310750961303711, variance: 0.06315948069095612
Train size: [784] hidden size: [4461] batch size: [10] trial: 23, train_loss: 0.391603, train acc: 89.58333333333333, test loss: 0.494008, test acc: 79.09625, bias2: 0.4306310713291168, variance: 0.06337667256593704
Train size: [784] hidden size: [4461] batch size: [10] trial: 24, train_loss: 0.395892, train acc: 89.0, test loss: 0.493550, test acc: 79.17599999999999, bias2: 0.4302004277706146, variance: 0.0633496418595314
Train size: [784] hidden size: [4461] batch size: [10] trial: 25, train_loss: 0.397107, train acc: 88.46153846153847, test loss: 0.493247, test acc: 79.22230769230768, bias2: 0.4298144280910492, variance: 0.06343241780996323
Train size: [784] hidden size: [4461] batch size: [10] trial: 26, train_loss: 0.400394, train acc: 88.88888888888889, test l

Train size: [784] hidden size: [5136] batch size: [10] trial: 9, train_loss: 0.459809, train acc: 90.0, test loss: 0.485000, test acc: 79.66599999999998, bias2: 0.4268801212310791, variance: 0.058120131492614746
Train size: [784] hidden size: [5136] batch size: [10] trial: 10, train_loss: 0.473071, train acc: 88.63636363636364, test loss: 0.483809, test acc: 79.84727272727271, bias2: 0.42543092370033264, variance: 0.058378029614686966
Train size: [784] hidden size: [5136] batch size: [10] trial: 11, train_loss: 0.463550, train acc: 89.58333333333333, test loss: 0.482848, test acc: 79.98499999999999, bias2: 0.4242147207260132, variance: 0.058633413165807724
Train size: [784] hidden size: [5136] batch size: [10] trial: 12, train_loss: 0.458852, train acc: 90.38461538461539, test loss: 0.483016, test acc: 80.02153846153844, bias2: 0.42380762100219727, variance: 0.0592082217335701
Train size: [784] hidden size: [5136] batch size: [10] trial: 13, train_loss: 0.443623, train acc: 91.07142857

Train size: [784] hidden size: [5136] batch size: [10] trial: 46, train_loss: 0.439197, train acc: 86.17021276595744, test loss: 0.481816, test acc: 80.02702127659572, bias2: 0.4186793863773346, variance: 0.06313657015562057
Train size: [784] hidden size: [5136] batch size: [10] trial: 47, train_loss: 0.439354, train acc: 86.45833333333333, test loss: 0.481800, test acc: 80.04249999999998, bias2: 0.4186745882034302, variance: 0.06312543153762817
Train size: [784] hidden size: [5136] batch size: [10] trial: 48, train_loss: 0.440611, train acc: 86.22448979591837, test loss: 0.481863, test acc: 80.04918367346937, bias2: 0.4186948239803314, variance: 0.06316834688186646
Train size: [784] hidden size: [5136] batch size: [10] trial: 49, train_loss: 0.442406, train acc: 86.0, test loss: 0.481847, test acc: 80.06219999999998, bias2: 0.4186932444572449, variance: 0.06315372884273529
##################################################
Train size: [784] hidden size: [5914] batch size: [10] trial: 

Train size: [784] hidden size: [5914] batch size: [10] trial: 33, train_loss: 0.426153, train acc: 86.02941176470588, test loss: 0.468987, test acc: 80.92088235294116, bias2: 0.40804845094680786, variance: 0.060938913375139236
Train size: [784] hidden size: [5914] batch size: [10] trial: 34, train_loss: 0.421997, train acc: 86.42857142857143, test loss: 0.468863, test acc: 80.93542857142855, bias2: 0.4078705906867981, variance: 0.06099262833595276
Train size: [784] hidden size: [5914] batch size: [10] trial: 35, train_loss: 0.417020, train acc: 86.80555555555556, test loss: 0.468829, test acc: 80.92388888888887, bias2: 0.4077986776828766, variance: 0.061030417680740356
Train size: [784] hidden size: [5914] batch size: [10] trial: 36, train_loss: 0.419598, train acc: 86.48648648648648, test loss: 0.468932, test acc: 80.89810810810809, bias2: 0.407795786857605, variance: 0.06113646179437637
Train size: [784] hidden size: [5914] batch size: [10] trial: 37, train_loss: 0.415251, train acc:

Train size: [784] hidden size: [6809] batch size: [10] trial: 21, train_loss: 0.453528, train acc: 84.0909090909091, test loss: 0.457426, test acc: 81.74318181818184, bias2: 0.3975454866886139, variance: 0.05988029018044472
Train size: [784] hidden size: [6809] batch size: [10] trial: 22, train_loss: 0.447527, train acc: 84.78260869565217, test loss: 0.457239, test acc: 81.80391304347827, bias2: 0.39734888076782227, variance: 0.05989007279276848
Train size: [784] hidden size: [6809] batch size: [10] trial: 23, train_loss: 0.435673, train acc: 85.41666666666667, test loss: 0.457361, test acc: 81.78083333333335, bias2: 0.39726585149765015, variance: 0.06009502708911896
Train size: [784] hidden size: [6809] batch size: [10] trial: 24, train_loss: 0.433116, train acc: 86.0, test loss: 0.457429, test acc: 81.81800000000003, bias2: 0.3973245918750763, variance: 0.06010421738028526
Train size: [784] hidden size: [6809] batch size: [10] trial: 25, train_loss: 0.432064, train acc: 85.5769230769

Train size: [784] hidden size: [7840] batch size: [10] trial: 8, train_loss: 0.345390, train acc: 94.44444444444444, test loss: 0.450046, test acc: 81.74555555555555, bias2: 0.3943619132041931, variance: 0.055684544146060944
Train size: [784] hidden size: [7840] batch size: [10] trial: 9, train_loss: 0.360622, train acc: 92.5, test loss: 0.449995, test acc: 81.79400000000001, bias2: 0.39392995834350586, variance: 0.05606529861688614
Train size: [784] hidden size: [7840] batch size: [10] trial: 10, train_loss: 0.355053, train acc: 93.18181818181819, test loss: 0.450455, test acc: 81.87727272727274, bias2: 0.39337360858917236, variance: 0.0570812001824379
Train size: [784] hidden size: [7840] batch size: [10] trial: 11, train_loss: 0.349454, train acc: 93.75, test loss: 0.449709, test acc: 81.99416666666667, bias2: 0.39224928617477417, variance: 0.05745977535843849
Train size: [784] hidden size: [7840] batch size: [10] trial: 12, train_loss: 0.347277, train acc: 94.23076923076923, test l

Train size: [784] hidden size: [7840] batch size: [10] trial: 45, train_loss: 0.365569, train acc: 91.30434782608695, test loss: 0.447581, test acc: 82.45086956521739, bias2: 0.38759279251098633, variance: 0.059988316148519516
Train size: [784] hidden size: [7840] batch size: [10] trial: 46, train_loss: 0.366813, train acc: 91.48936170212765, test loss: 0.447682, test acc: 82.43404255319147, bias2: 0.3876280188560486, variance: 0.0600535124540329
Train size: [784] hidden size: [7840] batch size: [10] trial: 47, train_loss: 0.368064, train acc: 91.14583333333333, test loss: 0.447603, test acc: 82.41270833333333, bias2: 0.3874644637107849, variance: 0.06013841927051544
Train size: [784] hidden size: [7840] batch size: [10] trial: 48, train_loss: 0.367686, train acc: 91.3265306122449, test loss: 0.447551, test acc: 82.4308163265306, bias2: 0.3874455690383911, variance: 0.06010521948337555
Train size: [784] hidden size: [7840] batch size: [10] trial: 49, train_loss: 0.372070, train acc: 90

Train size: [784] hidden size: [8] batch size: [784] trial: 32, train_loss: 1.196624, train acc: 9.790507111935682, test loss: 1.197332, test acc: 9.649393939393939, bias2: 0.9872801303863525, variance: 0.2100515514612198
Train size: [784] hidden size: [8] batch size: [784] trial: 33, train_loss: 1.201075, train acc: 9.73889555822329, test loss: 1.201678, test acc: 9.581470588235291, bias2: 0.9889092445373535, variance: 0.21276867389678955
Train size: [784] hidden size: [8] batch size: [784] trial: 34, train_loss: 1.202144, train acc: 9.602769679300291, test loss: 1.202305, test acc: 9.460857142857142, bias2: 0.9882786870002747, variance: 0.21402603387832642
Train size: [784] hidden size: [8] batch size: [784] trial: 35, train_loss: 1.202162, train acc: 9.615929705215418, test loss: 1.201639, test acc: 9.469444444444441, bias2: 0.9885290265083313, variance: 0.21311013400554657
Train size: [784] hidden size: [8] batch size: [784] trial: 36, train_loss: 1.200203, train acc: 9.54219525648

Train size: [784] hidden size: [9] batch size: [784] trial: 19, train_loss: 1.198717, train acc: 10.676020408163264, test loss: 1.205340, test acc: 10.495, bias2: 0.9866984486579895, variance: 0.2186414748430252
Train size: [784] hidden size: [9] batch size: [784] trial: 20, train_loss: 1.195453, train acc: 10.720359572400389, test loss: 1.201198, test acc: 10.57142857142857, bias2: 0.9863172173500061, variance: 0.21488110721111298
Train size: [784] hidden size: [9] batch size: [784] trial: 21, train_loss: 1.196885, train acc: 10.580936920222634, test loss: 1.202282, test acc: 10.45681818181818, bias2: 0.986412763595581, variance: 0.21586927771568298
Train size: [784] hidden size: [9] batch size: [784] trial: 22, train_loss: 1.191841, train acc: 10.570097604259095, test loss: 1.197180, test acc: 10.455652173913043, bias2: 0.9831213355064392, variance: 0.2140590399503708
Train size: [784] hidden size: [9] batch size: [784] trial: 23, train_loss: 1.192237, train acc: 10.607993197278912, 

Train size: [784] hidden size: [10] batch size: [784] trial: 6, train_loss: 1.178552, train acc: 9.821428571428571, test loss: 1.188938, test acc: 8.942857142857141, bias2: 1.032557487487793, variance: 0.1563807725906372
Train size: [784] hidden size: [10] batch size: [784] trial: 7, train_loss: 1.166479, train acc: 9.885204081632653, test loss: 1.173740, test acc: 9.405, bias2: 1.026110291481018, variance: 0.14762969315052032
Train size: [784] hidden size: [10] batch size: [784] trial: 8, train_loss: 1.172218, train acc: 10.416666666666666, test loss: 1.178428, test acc: 9.926666666666666, bias2: 1.0267398357391357, variance: 0.1516883373260498
Train size: [784] hidden size: [10] batch size: [784] trial: 9, train_loss: 1.161717, train acc: 10.599489795918368, test loss: 1.167688, test acc: 10.293999999999999, bias2: 1.0096341371536255, variance: 0.15805435180664062
Train size: [784] hidden size: [10] batch size: [784] trial: 10, train_loss: 1.167111, train acc: 10.250463821892394, tes

Train size: [784] hidden size: [10] batch size: [784] trial: 43, train_loss: 1.181580, train acc: 10.357722634508347, test loss: 1.187584, test acc: 10.385227272727274, bias2: 0.99515700340271, variance: 0.19242753088474274
Train size: [784] hidden size: [10] batch size: [784] trial: 44, train_loss: 1.182249, train acc: 10.280612244897958, test loss: 1.188702, test acc: 10.348222222222224, bias2: 0.9952821135520935, variance: 0.19342009723186493
Train size: [784] hidden size: [10] batch size: [784] trial: 45, train_loss: 1.181764, train acc: 10.370452528837621, test loss: 1.187988, test acc: 10.40608695652174, bias2: 0.9944033622741699, variance: 0.1935845911502838
Train size: [784] hidden size: [10] batch size: [784] trial: 46, train_loss: 1.181598, train acc: 10.339774207555362, test loss: 1.187926, test acc: 10.401063829787235, bias2: 0.9951759576797485, variance: 0.19274994730949402
Train size: [784] hidden size: [10] batch size: [784] trial: 47, train_loss: 1.181484, train acc: 10

Train size: [784] hidden size: [12] batch size: [784] trial: 30, train_loss: 1.170908, train acc: 11.06813693219223, test loss: 1.172270, test acc: 10.801935483870968, bias2: 0.9717816114425659, variance: 0.20048831403255463
Train size: [784] hidden size: [12] batch size: [784] trial: 31, train_loss: 1.170954, train acc: 11.017219387755102, test loss: 1.172531, test acc: 10.754687500000001, bias2: 0.972886323928833, variance: 0.19964423775672913
Train size: [784] hidden size: [12] batch size: [784] trial: 32, train_loss: 1.169923, train acc: 10.957792207792208, test loss: 1.171433, test acc: 10.706363636363639, bias2: 0.9720715284347534, variance: 0.19936130940914154
Train size: [784] hidden size: [12] batch size: [784] trial: 33, train_loss: 1.170093, train acc: 10.898109243697478, test loss: 1.171133, test acc: 10.660588235294119, bias2: 0.9716185927391052, variance: 0.19951419532299042
Train size: [784] hidden size: [12] batch size: [784] trial: 34, train_loss: 1.169107, train acc: 

Train size: [784] hidden size: [14] batch size: [784] trial: 17, train_loss: 1.172989, train acc: 9.729308390022677, test loss: 1.174126, test acc: 9.508888888888889, bias2: 0.988681435585022, variance: 0.18544457852840424
Train size: [784] hidden size: [14] batch size: [784] trial: 18, train_loss: 1.175223, train acc: 9.834854994629433, test loss: 1.178446, test acc: 9.52, bias2: 0.9918925762176514, variance: 0.18655326962471008
Train size: [784] hidden size: [14] batch size: [784] trial: 19, train_loss: 1.171678, train acc: 9.923469387755103, test loss: 1.174306, test acc: 9.754999999999999, bias2: 0.9868857264518738, variance: 0.1874205321073532
Train size: [784] hidden size: [14] batch size: [784] trial: 20, train_loss: 1.171238, train acc: 9.84572400388727, test loss: 1.173432, test acc: 9.558571428571428, bias2: 0.9858673214912415, variance: 0.1875642091035843
Train size: [784] hidden size: [14] batch size: [784] trial: 21, train_loss: 1.176649, train acc: 9.79243970315399, test 

Train size: [784] hidden size: [16] batch size: [784] trial: 4, train_loss: 1.120524, train acc: 9.209183673469386, test loss: 1.122331, test acc: 9.892, bias2: 0.973439633846283, variance: 0.1488916426897049
Train size: [784] hidden size: [16] batch size: [784] trial: 5, train_loss: 1.154700, train acc: 8.97108843537415, test loss: 1.159488, test acc: 9.391666666666667, bias2: 0.9817613363265991, variance: 0.1777263879776001
Train size: [784] hidden size: [16] batch size: [784] trial: 6, train_loss: 1.149211, train acc: 9.420553935860058, test loss: 1.155166, test acc: 9.525714285714287, bias2: 0.967720627784729, variance: 0.18744561076164246
Train size: [784] hidden size: [16] batch size: [784] trial: 7, train_loss: 1.155087, train acc: 9.40688775510204, test loss: 1.159547, test acc: 9.495000000000001, bias2: 0.9694435596466064, variance: 0.1901036500930786
Train size: [784] hidden size: [16] batch size: [784] trial: 8, train_loss: 1.146992, train acc: 9.608843537414966, test loss: 

Train size: [784] hidden size: [16] batch size: [784] trial: 41, train_loss: 1.139138, train acc: 10.747691933916425, test loss: 1.141465, test acc: 10.48142857142857, bias2: 0.958594560623169, variance: 0.18287041783332825
Train size: [784] hidden size: [16] batch size: [784] trial: 42, train_loss: 1.138698, train acc: 10.797342192691032, test loss: 1.140314, test acc: 10.600465116279068, bias2: 0.9577232599258423, variance: 0.18259099125862122
Train size: [784] hidden size: [16] batch size: [784] trial: 43, train_loss: 1.138544, train acc: 10.751971243042673, test loss: 1.139880, test acc: 10.526136363636361, bias2: 0.9577082395553589, variance: 0.18217149376869202
Train size: [784] hidden size: [16] batch size: [784] trial: 44, train_loss: 1.140381, train acc: 10.770975056689343, test loss: 1.141694, test acc: 10.540888888888887, bias2: 0.9598280191421509, variance: 0.18186555802822113
Train size: [784] hidden size: [16] batch size: [784] trial: 45, train_loss: 1.138382, train acc: 

Train size: [784] hidden size: [18] batch size: [784] trial: 28, train_loss: 1.144884, train acc: 9.869809992962702, test loss: 1.147692, test acc: 9.868965517241381, bias2: 0.9652850031852722, variance: 0.18240709602832794
Train size: [784] hidden size: [18] batch size: [784] trial: 29, train_loss: 1.146894, train acc: 9.812925170068029, test loss: 1.149806, test acc: 9.806333333333335, bias2: 0.9672070145606995, variance: 0.18259865045547485
Train size: [784] hidden size: [18] batch size: [784] trial: 30, train_loss: 1.146650, train acc: 9.817314022383147, test loss: 1.150046, test acc: 9.756774193548388, bias2: 0.9684639573097229, variance: 0.18158240616321564
Train size: [784] hidden size: [18] batch size: [784] trial: 31, train_loss: 1.145541, train acc: 9.861288265306124, test loss: 1.148532, test acc: 9.826875000000001, bias2: 0.9676109552383423, variance: 0.18092110753059387
Train size: [784] hidden size: [18] batch size: [784] trial: 32, train_loss: 1.147669, train acc: 9.7827

Train size: [784] hidden size: [21] batch size: [784] trial: 15, train_loss: 1.115419, train acc: 10.411352040816327, test loss: 1.127036, test acc: 10.044374999999999, bias2: 0.9405400156974792, variance: 0.186495840549469
Train size: [784] hidden size: [21] batch size: [784] trial: 16, train_loss: 1.115937, train acc: 10.414165666266507, test loss: 1.128700, test acc: 9.981176470588235, bias2: 0.9417335391044617, variance: 0.18696625530719757
Train size: [784] hidden size: [21] batch size: [784] trial: 17, train_loss: 1.112832, train acc: 10.445011337868483, test loss: 1.125440, test acc: 10.042777777777777, bias2: 0.9395542144775391, variance: 0.18588557839393616
Train size: [784] hidden size: [21] batch size: [784] trial: 18, train_loss: 1.107708, train acc: 10.855263157894738, test loss: 1.119551, test acc: 10.470526315789474, bias2: 0.9362354278564453, variance: 0.18331573903560638
Train size: [784] hidden size: [21] batch size: [784] trial: 19, train_loss: 1.112595, train acc: 1

Train size: [784] hidden size: [24] batch size: [784] trial: 1, train_loss: 1.081180, train acc: 12.11734693877551, test loss: 1.097352, test acc: 11.46, bias2: 1.003462553024292, variance: 0.09388947486877441
Train size: [784] hidden size: [24] batch size: [784] trial: 2, train_loss: 1.084432, train acc: 11.607142857142856, test loss: 1.099274, test acc: 10.74, bias2: 0.9688873291015625, variance: 0.13038691878318787
Train size: [784] hidden size: [24] batch size: [784] trial: 3, train_loss: 1.108772, train acc: 10.650510204081632, test loss: 1.117072, test acc: 10.3425, bias2: 0.9587351679801941, variance: 0.1583365947008133
Train size: [784] hidden size: [24] batch size: [784] trial: 4, train_loss: 1.116441, train acc: 10.10204081632653, test loss: 1.123609, test acc: 9.738, bias2: 0.9593964219093323, variance: 0.16421262919902802
Train size: [784] hidden size: [24] batch size: [784] trial: 5, train_loss: 1.120285, train acc: 9.226190476190476, test loss: 1.125956, test acc: 8.77666

Train size: [784] hidden size: [24] batch size: [784] trial: 38, train_loss: 1.110045, train acc: 10.72409733124019, test loss: 1.114421, test acc: 10.42461538461538, bias2: 0.9441378712654114, variance: 0.1702832728624344
Train size: [784] hidden size: [24] batch size: [784] trial: 39, train_loss: 1.110349, train acc: 10.880102040816329, test loss: 1.115341, test acc: 10.530749999999996, bias2: 0.9440760016441345, variance: 0.17126531898975372
Train size: [784] hidden size: [24] batch size: [784] trial: 40, train_loss: 1.108114, train acc: 10.8636137381782, test loss: 1.112722, test acc: 10.526097560975606, bias2: 0.942026674747467, variance: 0.1706950068473816
Train size: [784] hidden size: [24] batch size: [784] trial: 41, train_loss: 1.106815, train acc: 10.85094752186589, test loss: 1.111443, test acc: 10.535714285714281, bias2: 0.9409343004226685, variance: 0.17050838470458984
Train size: [784] hidden size: [24] batch size: [784] trial: 42, train_loss: 1.106618, train acc: 10.726

Train size: [784] hidden size: [28] batch size: [784] trial: 25, train_loss: 1.098679, train acc: 11.734693877551024, test loss: 1.099222, test acc: 11.496153846153845, bias2: 0.934532642364502, variance: 0.16468903422355652
Train size: [784] hidden size: [28] batch size: [784] trial: 26, train_loss: 1.097114, train acc: 11.70162509448224, test loss: 1.098404, test acc: 11.478148148148147, bias2: 0.9347779750823975, variance: 0.16362617909908295
Train size: [784] hidden size: [28] batch size: [784] trial: 27, train_loss: 1.097275, train acc: 11.689139941690964, test loss: 1.098156, test acc: 11.491071428571427, bias2: 0.9327374696731567, variance: 0.16541849076747894
Train size: [784] hidden size: [28] batch size: [784] trial: 28, train_loss: 1.097975, train acc: 11.695109078114005, test loss: 1.099189, test acc: 11.431379310344825, bias2: 0.9335601329803467, variance: 0.1656285673379898
Train size: [784] hidden size: [28] batch size: [784] trial: 29, train_loss: 1.098123, train acc: 1

Train size: [784] hidden size: [32] batch size: [784] trial: 11, train_loss: 1.077762, train acc: 10.235969387755103, test loss: 1.080084, test acc: 10.740833333333333, bias2: 0.9294560551643372, variance: 0.15062756836414337
Train size: [784] hidden size: [32] batch size: [784] trial: 12, train_loss: 1.078975, train acc: 10.42974882260597, test loss: 1.080687, test acc: 10.787692307692307, bias2: 0.929277777671814, variance: 0.15140952169895172
Train size: [784] hidden size: [32] batch size: [784] trial: 13, train_loss: 1.077645, train acc: 10.595845481049565, test loss: 1.079759, test acc: 10.809999999999999, bias2: 0.9286352396011353, variance: 0.1511240154504776
Train size: [784] hidden size: [32] batch size: [784] trial: 14, train_loss: 1.085711, train acc: 10.212585034013609, test loss: 1.087366, test acc: 10.443333333333332, bias2: 0.9322594404220581, variance: 0.15510645508766174
Train size: [784] hidden size: [32] batch size: [784] trial: 15, train_loss: 1.087531, train acc: 1

Train size: [784] hidden size: [32] batch size: [784] trial: 48, train_loss: 1.076689, train acc: 11.523844231570177, test loss: 1.078600, test acc: 11.465918367346939, bias2: 0.9181502461433411, variance: 0.1604495644569397
Train size: [784] hidden size: [32] batch size: [784] trial: 49, train_loss: 1.078423, train acc: 11.51020408163265, test loss: 1.080168, test acc: 11.451400000000001, bias2: 0.9187471866607666, variance: 0.16142061352729797
##################################################
Train size: [784] hidden size: [37] batch size: [784] trial: 0, train_loss: 1.030857, train acc: 12.11734693877551, test loss: 1.036066, test acc: 11.28, bias2: 1.036065936088562, variance: -7.785095901269301e-10
Train size: [784] hidden size: [37] batch size: [784] trial: 1, train_loss: 1.031600, train acc: 12.88265306122449, test loss: 1.044256, test acc: 12.219999999999999, bias2: 0.9645112156867981, variance: 0.07974525541067123
Train size: [784] hidden size: [37] batch size: [784] trial: 2

Train size: [784] hidden size: [37] batch size: [784] trial: 35, train_loss: 1.077684, train acc: 11.305980725623584, test loss: 1.081411, test acc: 11.202500000000002, bias2: 0.910322904586792, variance: 0.1710880994796753
Train size: [784] hidden size: [37] batch size: [784] trial: 36, train_loss: 1.076447, train acc: 11.283094318808606, test loss: 1.079867, test acc: 11.220810810810812, bias2: 0.9085885286331177, variance: 0.1712784618139267
Train size: [784] hidden size: [37] batch size: [784] trial: 37, train_loss: 1.077026, train acc: 11.251342642320088, test loss: 1.079902, test acc: 11.227894736842108, bias2: 0.9077600240707397, variance: 0.1721421480178833
Train size: [784] hidden size: [37] batch size: [784] trial: 38, train_loss: 1.077198, train acc: 11.26046572475144, test loss: 1.079766, test acc: 11.241538461538463, bias2: 0.9072769284248352, variance: 0.17248885333538055
Train size: [784] hidden size: [37] batch size: [784] trial: 39, train_loss: 1.076350, train acc: 11.

Train size: [784] hidden size: [43] batch size: [784] trial: 22, train_loss: 1.056561, train acc: 11.873336291038152, test loss: 1.059821, test acc: 12.062173913043479, bias2: 0.91846764087677, variance: 0.14135372638702393
Train size: [784] hidden size: [43] batch size: [784] trial: 23, train_loss: 1.056307, train acc: 11.81441326530612, test loss: 1.060001, test acc: 11.979583333333332, bias2: 0.9177044630050659, variance: 0.1422964185476303
Train size: [784] hidden size: [43] batch size: [784] trial: 24, train_loss: 1.055041, train acc: 11.948979591836732, test loss: 1.059023, test acc: 12.0808, bias2: 0.9165000319480896, variance: 0.1425226479768753
Train size: [784] hidden size: [43] batch size: [784] trial: 25, train_loss: 1.056098, train acc: 11.98489010989011, test loss: 1.059699, test acc: 12.126923076923076, bias2: 0.91762375831604, variance: 0.1420753300189972
Train size: [784] hidden size: [43] batch size: [784] trial: 26, train_loss: 1.054909, train acc: 12.051209372637944

Train size: [784] hidden size: [49] batch size: [784] trial: 9, train_loss: 1.034114, train acc: 13.163265306122451, test loss: 1.037707, test acc: 13.025, bias2: 0.905888557434082, variance: 0.13181838393211365
Train size: [784] hidden size: [49] batch size: [784] trial: 10, train_loss: 1.037588, train acc: 12.697124304267163, test loss: 1.041125, test acc: 12.522727272727273, bias2: 0.9049180150032043, variance: 0.13620740175247192
Train size: [784] hidden size: [49] batch size: [784] trial: 11, train_loss: 1.038430, train acc: 12.383078231292517, test loss: 1.041706, test acc: 12.29, bias2: 0.9033154249191284, variance: 0.13839104771614075
Train size: [784] hidden size: [49] batch size: [784] trial: 12, train_loss: 1.044642, train acc: 12.284144427001571, test loss: 1.047057, test acc: 12.214615384615383, bias2: 0.9047027826309204, variance: 0.14235401153564453
Train size: [784] hidden size: [49] batch size: [784] trial: 13, train_loss: 1.043174, train acc: 11.989795918367347, test 

Train size: [784] hidden size: [49] batch size: [784] trial: 46, train_loss: 1.047877, train acc: 12.326313504125057, test loss: 1.049285, test acc: 12.376808510638297, bias2: 0.8976351022720337, variance: 0.1516500562429428
Train size: [784] hidden size: [49] batch size: [784] trial: 47, train_loss: 1.047897, train acc: 12.39902210884354, test loss: 1.049608, test acc: 12.421666666666665, bias2: 0.897497296333313, variance: 0.15211030840873718
Train size: [784] hidden size: [49] batch size: [784] trial: 48, train_loss: 1.047194, train acc: 12.385464389837571, test loss: 1.049013, test acc: 12.379183673469386, bias2: 0.8973394632339478, variance: 0.15167340636253357
Train size: [784] hidden size: [49] batch size: [784] trial: 49, train_loss: 1.047361, train acc: 12.321428571428573, test loss: 1.049153, test acc: 12.364399999999998, bias2: 0.897835373878479, variance: 0.15131793916225433
##################################################
Train size: [784] hidden size: [56] batch size: [

Train size: [784] hidden size: [56] batch size: [784] trial: 33, train_loss: 1.039943, train acc: 11.993547418967589, test loss: 1.043845, test acc: 11.706176470588233, bias2: 0.901371955871582, variance: 0.1424727737903595
Train size: [784] hidden size: [56] batch size: [784] trial: 34, train_loss: 1.039265, train acc: 11.935131195335279, test loss: 1.042682, test acc: 11.737142857142857, bias2: 0.900676429271698, variance: 0.14200584590435028
Train size: [784] hidden size: [56] batch size: [784] trial: 35, train_loss: 1.038028, train acc: 11.90121882086168, test loss: 1.041349, test acc: 11.747499999999999, bias2: 0.8989838361740112, variance: 0.14236512780189514
Train size: [784] hidden size: [56] batch size: [784] trial: 36, train_loss: 1.037505, train acc: 11.796745725317155, test loss: 1.040639, test acc: 11.701621621621621, bias2: 0.8983574509620667, variance: 0.14228196442127228
Train size: [784] hidden size: [56] batch size: [784] trial: 37, train_loss: 1.037509, train acc: 11

Train size: [784] hidden size: [65] batch size: [784] trial: 20, train_loss: 1.017622, train acc: 13.708697764820212, test loss: 1.023779, test acc: 12.88, bias2: 0.8937038779258728, variance: 0.13007527589797974
Train size: [784] hidden size: [65] batch size: [784] trial: 21, train_loss: 1.018062, train acc: 13.6247680890538, test loss: 1.023700, test acc: 12.81409090909091, bias2: 0.8926634788513184, variance: 0.13103628158569336
Train size: [784] hidden size: [65] batch size: [784] trial: 22, train_loss: 1.018550, train acc: 13.531499556344274, test loss: 1.024563, test acc: 12.686521739130436, bias2: 0.8932397961616516, variance: 0.13132278621196747
Train size: [784] hidden size: [65] batch size: [784] trial: 23, train_loss: 1.017824, train acc: 13.77019557823129, test loss: 1.023638, test acc: 12.951250000000002, bias2: 0.8915018439292908, variance: 0.1321360468864441
Train size: [784] hidden size: [65] batch size: [784] trial: 24, train_loss: 1.018293, train acc: 13.7193877551020

Train size: [784] hidden size: [75] batch size: [784] trial: 7, train_loss: 1.006129, train acc: 13.807397959183675, test loss: 1.015848, test acc: 12.968750000000002, bias2: 0.9012213945388794, variance: 0.11462676525115967
Train size: [784] hidden size: [75] batch size: [784] trial: 8, train_loss: 1.004117, train acc: 13.846371882086167, test loss: 1.012550, test acc: 13.06888888888889, bias2: 0.897679328918457, variance: 0.11487056314945221
Train size: [784] hidden size: [75] batch size: [784] trial: 9, train_loss: 1.001589, train acc: 13.698979591836736, test loss: 1.008763, test acc: 13.032000000000002, bias2: 0.8931669592857361, variance: 0.115596242249012
Train size: [784] hidden size: [75] batch size: [784] trial: 10, train_loss: 1.001441, train acc: 13.439239332096475, test loss: 1.008912, test acc: 12.92636363636364, bias2: 0.8903317451477051, variance: 0.11858034878969193
Train size: [784] hidden size: [75] batch size: [784] trial: 11, train_loss: 1.003483, train acc: 13.105

Train size: [784] hidden size: [75] batch size: [784] trial: 44, train_loss: 1.006276, train acc: 13.075396825396828, test loss: 1.012604, test acc: 12.939777777777778, bias2: 0.8787342309951782, variance: 0.13387015461921692
Train size: [784] hidden size: [75] batch size: [784] trial: 45, train_loss: 1.005282, train acc: 13.146073646850047, test loss: 1.011459, test acc: 13.040652173913044, bias2: 0.8777005076408386, variance: 0.13375802338123322
Train size: [784] hidden size: [75] batch size: [784] trial: 46, train_loss: 1.005231, train acc: 13.132327399044726, test loss: 1.011421, test acc: 13.009574468085107, bias2: 0.8773802518844604, variance: 0.1340406984090805
Train size: [784] hidden size: [75] batch size: [784] trial: 47, train_loss: 1.004297, train acc: 13.230761054421771, test loss: 1.010452, test acc: 13.086666666666668, bias2: 0.8762259483337402, variance: 0.1342257410287857
Train size: [784] hidden size: [75] batch size: [784] trial: 48, train_loss: 1.003212, train acc: 

Train size: [784] hidden size: [86] batch size: [784] trial: 31, train_loss: 0.990162, train acc: 14.963329081632656, test loss: 0.996992, test acc: 14.214062499999999, bias2: 0.8674598932266235, variance: 0.12953227758407593
Train size: [784] hidden size: [86] batch size: [784] trial: 32, train_loss: 0.990910, train acc: 14.892547928262216, test loss: 0.998020, test acc: 14.10121212121212, bias2: 0.8678715825080872, variance: 0.13014839589595795
Train size: [784] hidden size: [86] batch size: [784] trial: 33, train_loss: 0.989473, train acc: 15.039765906362547, test loss: 0.996437, test acc: 14.308235294117646, bias2: 0.8662769794464111, variance: 0.13015970587730408
Train size: [784] hidden size: [86] batch size: [784] trial: 34, train_loss: 0.988378, train acc: 15.160349854227407, test loss: 0.995085, test acc: 14.474285714285713, bias2: 0.8643815517425537, variance: 0.1307038962841034
Train size: [784] hidden size: [86] batch size: [784] trial: 35, train_loss: 0.988978, train acc: 

Train size: [784] hidden size: [99] batch size: [784] trial: 18, train_loss: 0.981781, train acc: 15.238990332975295, test loss: 0.984998, test acc: 14.901052631578947, bias2: 0.8617306351661682, variance: 0.12326780706644058
Train size: [784] hidden size: [99] batch size: [784] trial: 19, train_loss: 0.980133, train acc: 15.293367346938775, test loss: 0.983475, test acc: 14.91, bias2: 0.8604713678359985, variance: 0.123003289103508
Train size: [784] hidden size: [99] batch size: [784] trial: 20, train_loss: 0.980689, train acc: 15.184645286686102, test loss: 0.984734, test acc: 14.762857142857142, bias2: 0.8614701628684998, variance: 0.12326370924711227
Train size: [784] hidden size: [99] batch size: [784] trial: 21, train_loss: 0.981784, train acc: 15.103200371057513, test loss: 0.985915, test acc: 14.736818181818181, bias2: 0.8625028133392334, variance: 0.12341216206550598
Train size: [784] hidden size: [99] batch size: [784] trial: 22, train_loss: 0.982000, train acc: 15.2395740905

Train size: [784] hidden size: [114] batch size: [784] trial: 5, train_loss: 0.939430, train acc: 18.792517006802722, test loss: 0.946086, test acc: 18.976666666666667, bias2: 0.8393636345863342, variance: 0.10672269016504288
Train size: [784] hidden size: [114] batch size: [784] trial: 6, train_loss: 0.943023, train acc: 18.476676384839653, test loss: 0.951100, test acc: 18.157142857142855, bias2: 0.8414565920829773, variance: 0.10964374244213104
Train size: [784] hidden size: [114] batch size: [784] trial: 7, train_loss: 0.945823, train acc: 18.03252551020408, test loss: 0.954305, test acc: 17.64875, bias2: 0.8421844244003296, variance: 0.11212043464183807
Train size: [784] hidden size: [114] batch size: [784] trial: 8, train_loss: 0.944057, train acc: 18.494897959183675, test loss: 0.952363, test acc: 17.962222222222223, bias2: 0.8387810587882996, variance: 0.113582082092762
Train size: [784] hidden size: [114] batch size: [784] trial: 9, train_loss: 0.944670, train acc: 18.55867346

Train size: [784] hidden size: [114] batch size: [784] trial: 42, train_loss: 0.963497, train acc: 16.709183673469386, test loss: 0.968283, test acc: 16.308837209302332, bias2: 0.8412360548973083, variance: 0.12704670429229736
Train size: [784] hidden size: [114] batch size: [784] trial: 43, train_loss: 0.963337, train acc: 16.712082560296846, test loss: 0.968220, test acc: 16.300000000000008, bias2: 0.8410045504570007, variance: 0.1272156983613968
Train size: [784] hidden size: [114] batch size: [784] trial: 44, train_loss: 0.964119, train acc: 16.621315192743765, test loss: 0.969005, test acc: 16.215777777777785, bias2: 0.8415286540985107, variance: 0.12747672200202942
Train size: [784] hidden size: [114] batch size: [784] trial: 45, train_loss: 0.964226, train acc: 16.570541259982257, test loss: 0.969272, test acc: 16.13456521739131, bias2: 0.8417400121688843, variance: 0.12753239274024963
Train size: [784] hidden size: [114] batch size: [784] trial: 46, train_loss: 0.964065, train 

Train size: [784] hidden size: [131] batch size: [784] trial: 29, train_loss: 0.962860, train acc: 17.389455782312925, test loss: 0.968310, test acc: 16.431000000000004, bias2: 0.8430306315422058, variance: 0.12527979910373688
Train size: [784] hidden size: [131] batch size: [784] trial: 30, train_loss: 0.961211, train acc: 17.416886109282423, test loss: 0.966596, test acc: 16.552903225806453, bias2: 0.8414320945739746, variance: 0.12516441941261292
Train size: [784] hidden size: [131] batch size: [784] trial: 31, train_loss: 0.961049, train acc: 17.44658801020408, test loss: 0.966390, test acc: 16.585625000000004, bias2: 0.8412340879440308, variance: 0.12515562772750854
Train size: [784] hidden size: [131] batch size: [784] trial: 32, train_loss: 0.960951, train acc: 17.350803957946816, test loss: 0.966017, test acc: 16.54484848484849, bias2: 0.8410612344741821, variance: 0.1249561682343483
Train size: [784] hidden size: [131] batch size: [784] trial: 33, train_loss: 0.960392, train a

In [None]:
import matplotlib
import matplotlib.pyplot as plt
font = {
        'size'   : 18}
matplotlib.rc('font', **font)
figsize = (16, 5)
import seaborn as sns
sns.set_style('darkgrid')
import pandas as pd

def plot_bias_var(df, N_D, ymin=0, ymax=1.0):
    fig1, axes1 = plt.subplots(1, 3, figsize=figsize)
    axes1[0].set_xscale('log')
    axes1[1].set_xscale('log')
    axes1[2].set_xscale('log')
    cur_df = df[df['train_size']/feature_dim==N_D]
    test_loss = cur_df['test_loss']
    bias2 = cur_df['bias2']
    var = cur_df['variance']
    P_N = cur_df['hidden_size']/cur_df['train_size']
    axes1[0].plot(P_N, test_loss)
    axes1[0].set_xlabel("P/N")
    axes1[0].set_ylabel("Test Loss")
    axes1[0].set_ylim(ymin, ymax)
    axes1[1].plot(P_N, bias2)
    axes1[1].set_xlabel("P/N")
    axes1[1].set_ylabel("Bias Square")
    axes1[1].set_ylim(ymin, ymax)
    axes1[2].plot(P_N, var)
    axes1[2].set_xlabel("P/N")
    axes1[2].set_ylabel("Variance")
    axes1[2].set_ylim(ymin, ymax)
    fig1.suptitle("Bias-Variance Decomposition (N/D={:.2f})".format(N_D))
    plt.show()
def plot_single_vs_ensemble(dfs_list, Ks_list, N_D, feature_dim, ymin=0, ymax=1.0):
    assert len(dfs_list) == len(Ks_list)
    fig1, axes1 = plt.subplots(1, 3, figsize=figsize)
    for i in range(3):
        axes1[i].set_xscale('log')
    dfs_list = [df[df['train_size']/feature_dim==N_D] for df in dfs_list]
    for cur_df, K in zip(dfs_list, Ks_list):
        test_loss = cur_df['test_loss']
        bias2 = cur_df['bias2']
        var = cur_df['variance']
        P_N = cur_df['hidden_size']/cur_df['train_size']
        axes1[0].plot(P_N, test_loss, label='K={}'.format(K))
        axes1[1].plot(P_N, bias2, label='K={}'.format(K))
        axes1[2].plot(P_N, var, label='K={}'.format(K))
    
    axes1[0].set_xlabel("P/N")
    axes1[0].set_ylabel("Test Loss")
    axes1[0].set_ylim(ymin, ymax)
    
    axes1[1].set_xlabel("P/N")
    axes1[1].set_ylabel("Bias Square")
    axes1[1].set_ylim(ymin, ymax)
    
    axes1[2].set_xlabel("P/N")
    axes1[2].set_ylabel("Variance")
    axes1[2].set_ylim(ymin, ymax)
    fig1.suptitle("Bias-Variance Decomposition (N/D={:.2f})".format(N_D))
    plt.legend()
    plt.show()


In [None]:
# K2_df = pd.read_csv(os.path.join(outdir, 'ensembleNNK=2_output.csv'))
# K1_df = pd.read_csv(os.path.join(outdir, 'singleNN_output.csv'))
# plot_single_vs_ensemble([K1_df, K2_df], [1, 2], N_Ds[0], 784,)