In [1]:
from functools import partial # for trials
import numpy as np # for accuracy math
import os # for paths
import torch # for nn instantiation
import torch.nn as nn # for nn objects
import torch.nn.functional as F # for forward method
import torch.optim as optim # for optimization
from torch.utils.data import random_split # for train/test split
import torchvision # for data transforms
import torchvision.transforms as transforms # for transform methods
from ray import tune # for trialing
from ray.tune import CLIReporter # for trial reporting
from ray.tune import JupyterNotebookReporter # for trial reporting
from ray.tune.schedulers import ASHAScheduler # for trial scheduling
from ray.tune.schedulers import HyperBandForBOHB # for trial scheduling
from ray.tune.suggest.bohb import TuneBOHB # for trial selection/pruning
import ConfigSpace as CS # for configuration bounds
from collections import OrderedDict # for dynamic configuration definition
from pathlib import Path # for OS agnostic path definition

# import itertools package 
import itertools 
from itertools import combinations, combinations_with_replacement
from itertools import product

import math
import pandas as pd

# from sklearn import preprocessing

# allow configuration copying
from copy import deepcopy

In [2]:
# set data and checkpoint locations
p = Path('.')
d = p / 'data'
r = p / 'ray_results'
l = p / 'checkpoints' / 'layers'
n = p / 'checkpoints' / 'layers'

## set number or fraction of GPUs (per training loop) you'd like to utilize, if any at all
cpu_use = 1
gpu_use = 0.25

Since the neuron configuration we want is dependent upon the number of layers we have, we need to work flatten the feature space a bit. We can reduce the high-dminesional setups to a slightly less high-dminesional string of base-n nodes.

In [3]:
# define feature space for hashing

c_min = 3**2
c_max = 3**5
f_min = 2**2
f_max = 2**6

c = c_max - c_min
f = f_max - f_min

# conv = set(range(c_max)) - set(range(c_min))
# full = set(range(f_max)) - set(range(f_min))
conv = range(c_max)[c_min:]
full = range(f_max)[f_min:]

c_comb = list(combinations_with_replacement(conv,2))
f_comb = []
for layers in range(1,4):
    f_comb += list(combinations_with_replacement(full,layers))

# for conversion from dec to whatever we end up using
# most to least significant digit
def numberToBase(n, b):
    if n == 0:
        return [0]
    digits = []
    while n:
        digits.append(int(n % b))
        n //= b
    rev = digits[::-1]
    return rev

def feature_spacing():
    
    # create empty list to store the 
    # combinations 
    unique_combinations = list(combinations([c_comb,f_comb],2))
    total_uniques = len(unique_combinations)
    total_points = total_uniques**2
    total_cvs = len(c_comb)
    total_fcs = len(f_comb)
    
#     print(total_cvs)
#     print(total_fcs)
    
#     print(c_comb[np.random.randint(0,len(c_comb))])
#     print(f_comb[np.random.randint(0,len(f_comb))])

#     for ls in range(0,4):
#         unique_combinations.append((c**2)*(f*(f+1)**ls))
#         total_uniques += (c**2)*f*((f+1)**ls)
#         total_fcs += f*((f+1)**ls)
    
#     total_uniques -= ((c**2)*f)
#     total_points = total_uniques**2
    
#     print("number of combos: %s" % ["%s-fc model: %s" % (l,v) for l,v in enumerate(unique_combinations, 1)])
#     print("total uniques:",total_uniques)
#     print("number of points/indices (with sparicities/noise): %s" % total_points)
#     print("\n")
    
    columns = ["base","nodes_req","sparcity","sparcity_pcnt","denoise_pcnt"]
    values = [1,total_uniques,total_points - total_uniques,(total_points - total_uniques) / total_points,0]
#     results = {
#         "base": [1],
#         "nodes_req": [total_uniques],
#         "sparcity": [total_points - total_uniques],
#         "max_necc_base_value":[0],
#         "nodes+_req": [0],
#         "subsparcity": [0],
#         "unexplained":[0],
#         "sparcity_pcnt": [(total_points - total_uniques) / total_points * 100],
#         "subsparcity_pcnt": [0],
#         "denoise_pcnt":[0],
#         "complexity":[0]
#     }
    
    cf = []
#     print(report.to_string())
    for layer in [total_cvs,total_fcs]:#,total_uniques]:
        results = {
            "base": [1],
            "nodes_req": [total_uniques],
            "sparcity": [total_points - total_uniques],
            "max_necc_base_value":[0],
            "nodes+_req": [0],
            "subsparcity": [0],
            "unexplained":[0],
            "sparcity_pcnt": [(total_points - total_uniques) / total_points * 100],
            "subsparcity_pcnt": [0],
            "denoise_pcnt":[0],
            "complexity":[0]
        }

        report = pd.DataFrame(results)
    
        for base in range(2,17):
            results["base"] = [base]
            results["nodes_req"] = [math.ceil(math.log(layer,(base)))]
            results["nodes+_req"] = [math.floor(math.log(layer,(base)))]
            
            results["sparcity"] = [base**math.ceil(math.log(layer,base)) - layer]
            results["subsparcity"] = [-(base**math.floor(math.log(layer,base)) - layer)]
            
            results["sparcity_pcnt"] = [(base**math.ceil(math.log(layer,(base))) - base**math.log(layer,(base)))/(base**math.ceil(math.log(layer,(base))))*100]
            results["subsparcity_pcnt"] = [-((base**math.floor(math.log(layer,(base))) - base**math.log(layer,(base)))/(base**math.floor(math.log(layer,(base))))*100)]
            
#             results["max_necc_base_value"] = [numberToBase((results["base"][0]**results["nodes+_req"][0]+results["subsparcity"][0]),results["base"][0])]
            results["max_necc_base_value"] = [numberToBase(layer,base)]
            results["unexplained"] = [(-(base**math.floor(math.log(layer,base)) - layer))*(math.floor(math.log(layer,(base))))]
            
            results["denoise_pcnt"] = [math.floor(((total_points-(math.ceil(math.log(layer,base)))**2)/total_points)*100)]
        
            results["complexity"] = [results["nodes_req"][0]*(results["sparcity"][0]+1)]

            report = report.append(pd.DataFrame(results))
            
            
        report.index = [x for x in range(1, len(report.values)+1)]
#         report.set_index(range(len(report)),inplace=True)
        report.drop([1],axis=0,inplace=True)
#         print("value: %s \n" % layer)
        report.sort_values(["sparcity","unexplained","nodes+_req","subsparcity","sparcity_pcnt","base"],inplace=True)
#         print(report)
#         print(report.to_string(),"\n")
        
#         print(report.max())
#         print(report.min())
        
#         report_norm = (report + -1 * report.mean()) / (report.max() + -1 * report.min())
#         print(report_norm.to_string(),"\n")
        
        cf.append(report.iloc[0])
    
    return cf

[print(r,"\n") for r in feature_spacing()]

base                              13
nodes_req                          4
sparcity                        1066
max_necc_base_value    [12, 6, 9, 0]
nodes+_req                         3
subsparcity                    25298
unexplained                    75894
sparcity_pcnt                3.73236
subsparcity_pcnt             1151.48
denoise_pcnt                   -1500
complexity                      4268
Name: 13, dtype: object 

base                                    6
nodes_req                               6
sparcity                             6946
max_necc_base_value    [5, 0, 3, 5, 0, 2]
nodes+_req                              5
subsparcity                         31934
unexplained                        159670
sparcity_pcnt                     14.8877
subsparcity_pcnt                  410.674
denoise_pcnt                        -3500
complexity                          41682
Name: 6, dtype: object 



[None, None]

For the convolutional layers, base 9 seems to allow us to use the fewest nodes with the lowest number of invalid configuration indices (sparcity).
For the linear layers, base 16 seems to allow us to use the fewest nodes with the lowest number of invalid configuration indices (sparcity).

We can use the 

In [4]:
bases = feature_spacing()

base_c = bases[0]["base"]
base_f = bases[1]["base"]

def base_to_dec(num_list, base):
    num_list = num_list[::-1]
    num = 0
    for k in range(len(num_list)):
        dig = num_list[k]
#         if dig.isdigit():
#             dig = int(dig)
        dig = int(dig)
#         else:    #Assuming its either number or alphabet only
#             dig = ord(dig.upper())-ord('A')+10
        num += dig*(base**k)
    return num

def encode(config=[(24, 64),(13, 18, 41)]):
    iconv = c_comb.index(config[0])
    ifull = f_comb.index(config[1])
    
    conv_hash = numberToBase(iconv,base_c)
    full_hash = numberToBase(ifull,base_f)
    
    return [conv_hash,full_hash]

# print([(24, 64),(13, 18, 41)])
# print("to")
# print(encode([(24, 64),(13, 18, 41)]))

def decode(hash=([1, 7, 5, 0], [2, 0, 4, 3, 4, 4])):
    conv = base_to_dec(hash[0], base_c)
    full = base_to_dec(hash[1], base_f)

    
    return [c_comb[conv],f_comb[full]]


# print([[1, 7, 5, 0], [2, 0, 4, 3, 4, 4]])
# print("to")
# print(decode())

In [5]:
# move data into sets for loading
def load_data(data_dir=d.absolute()):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    trainset,testset = [torchvision.datasets.CIFAR10(root=data_dir, train=is_train, download=True, transform=transform) for is_train in [True,False]]

    return trainset, testset

In [6]:
# dynamically-generated nn that takes a 3-channel image and outputs a label
class Net(nn.Module):
    def __init__(self, hidden_layers=[[6, 16],[120,84]]):
        super(Net, self).__init__()
        hidden_convs,hidden_fcs = hidden_layers
        print(hidden_convs)
        print(hidden_fcs)
        uf_input = 0
        layer_list = OrderedDict()
        
        layer_list['conv1'] = nn.Conv2d(3, hidden_convs[0], 5)
        layer_list['pool1'] = nn.MaxPool2d(2, 2)

        layer_input = layer_list['conv1'].out_channels
        
        for layer_num, channels in enumerate(hidden_convs[1:], 2):
            layer_list["conv%s" % layer_num]  = nn.Conv2d(layer_input, channels, 5)
            layer_list["pool%s" % layer_num] = nn.MaxPool2d(2, 2)
            layer_input = layer_list["conv%s" % layer_num].out_channels
        
        
        layer_list["flat"] = nn.Flatten()
        
        layer_list['fc1'] = nn.Linear(layer_input*5*5, hidden_fcs[0])
        layer_list["relu1"]  = nn.ReLU()
        
        layer_input = layer_list['fc1'].out_features
        for (layer_num, features) in enumerate(hidden_fcs[1:], 2):
            layer_list["fc%s" % layer_num]  = nn.Linear(layer_input, features)
            layer_list["relu%s" % layer_num]  = nn.ReLU()
            layer_input = layer_list["fc%s" % layer_num].out_features
            
        
        layer_list['fco'] = nn.Linear(hidden_fcs[-1], 10)
    
        self.layers = nn.Sequential(layer_list)

    def forward(self, x):
        x = self.layers(x)
        return x

In [7]:
# train nn on data
def train_cifar(neuron_config, checkpoint_dir=None):
    
    data_dir=d.absolute()
    
    def cv_discrim(s): return 'conv_subindex_' in s
    def fc_discrim(s): return 'full_subindex_' in s
    cvs = [neuron_config[hp] for hp in list(filter(cv_discrim, neuron_config.keys()))]
    fcs = [neuron_config[hp] for hp in list(filter(fc_discrim, neuron_config.keys()))]
#     cvs = neuron_config["cvs"]
#     fcs = neuron_config["fcs"]
    
    cfg = decode([cvs, fcs])
    
    net = Net(cfg)

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=neuron_config["lr"], momentum=0.9)

    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            os.path.join(checkpoint_dir, "checkpoint"))
        net.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)

    trainset, testset = load_data()

    test_abs = int(len(trainset) * 0.8)
    train_subset, val_subset = random_split(
        trainset, [test_abs, len(trainset) - test_abs])

    trainloader,valloader = [torch.utils.data.DataLoader(
        train_subset,
        batch_size=int(neuron_config["batch_size"]),
        shuffle=True,
        num_workers=1) for subset in [train_subset,val_subset]]

    for epoch in range(neuron_config["epochs"]):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            epoch_steps += 1
            if i % 2000 == 1999:  # print every 2000 mini-batches
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1,
                                                running_loss / epoch_steps))
                running_loss = 0.0

        # Validation loss
        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(valloader, 0):
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = criterion(outputs, labels)
                val_loss += loss.cpu().numpy()
                val_steps += 1

        with tune.checkpoint_dir(epoch) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save((net.state_dict(), optimizer.state_dict()), path)

        tune.report(loss=(val_loss / val_steps), accuracy=(correct / total))
    print("Finished Training")

In [8]:
# get accuracy score
def test_accuracy(net, device="cpu"):
    trainset, testset = load_data()

    testloader = torch.utils.data.DataLoader(
        testset, batch_size=4, shuffle=False, num_workers=1)

    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

In [9]:
#determine configuration boundary for nn based on number of layers
nodes_c = bases[0]["nodes_req"]
nodes_f = bases[1]["nodes_req"]
max_c = bases[0]["max_necc_base_value"]
max_f = bases[1]["max_necc_base_value"]

# def configure_neurons(num_convs,num_fcs):
def configure_neurons():
    config_space = CS.ConfigurationSpace()
    
    config_space.add_hyperparameter(
        CS.UniformFloatHyperparameter(name="lr", lower=1e-4, upper=1e-1, log=True))
    config_space.add_hyperparameter(
        CS.CategoricalHyperparameter(name="batch_size", choices=[4, 8, 16, 32]))
    config_space.add_hyperparameter(
        CS.CategoricalHyperparameter(name="epochs", choices=[20, 30, 40]))
    
#     conv_rules,full_rules = [],[]
    conv_lims,full_lims = [],[]
    
    for subindex in range(nodes_c):
        # define hyperparameter reference attributes
        rule_name = "conv_subindex_%s" % subindex
        conv_rule = CS.UniformIntegerHyperparameter(rule_name, lower=0, upper=base_c-1)
        
        # add hyperparameter to collections
#         config_space.add_hyperparameter(
#             CS.UniformIntegerHyperparameter("conv_subindex_%s" % subindex, lower=0, upper=base_c-1))
        config_space.add_hyperparameter(conv_rule)
    
        conv_rules = list(filter(lambda hp: "conv_subindex_" in hp.name, config_space.get_hyperparameters()))
    
        # build banlist from collections
        rl = deepcopy(config_space)
        rd = {}
        for ri,rule in enumerate(conv_rules,1):
#             print(len(conv_rules))
#             print(max_c[ri-1])
#             print(config_space.get_hyperparameter(rule_name).upper)
            
#             print(rule)
    
            if (len(conv_rules) == 1) & (max_c[ri-1] == config_space.get_hyperparameter(rule_name).upper):
#                 print("breaking")
                break
            elif ri != len(conv_rules):
#                 rl.add_forbidden_clause(
#                     CS.ForbiddenEqualsClause(
# #                         config_space.get_hyperparameter(rule), 
#                         rule,
#                         max_c[ri-1]
#                     )
#                 )
                rd[rule.name] = CS.ForbiddenEqualsClause(
#                         config_space.get_hyperparameter(rule), 
                        rule,
                        max_c[ri-1]
                    )
            else:
#                 rl.add_forbidden_clause(
#                     CS.ForbiddenInClause(
# #                         config_space.get_hyperparameter(rule), 
#                         rule,
#                         range(
#                             max_c[ri-1] + 1, 
# #                             config_space.get_hyperparameter(rule).upper + 1
#                             rule.upper + 1
#                         )
#                     )
#                 )
                rd[rule.name] = CS.ForbiddenInClause(
#                         config_space.get_hyperparameter(rule), 
                        rule,
                        range(
                            max_c[ri-1] + 1, 
#                             config_space.get_hyperparameter(rule).upper + 1
                            rule.upper + 1
                        )
                    )
        
        # package banlist for addition to config space
#         print(rl.get_forbiddens())
        
        # add banlist to collection
#         if rl.get_forbiddens():
#             config_space.add_forbidden_clause(
#                 CS.ForbiddenAndConjunction(
#                     *rl.get_forbiddens()
#                 )
#             )
        if rd.values():
            config_space.add_forbidden_clause(
                CS.ForbiddenAndConjunction(
                    *rd.values()
                )
            )           
                
#     print(config_space.get_forbiddens())
    
#     for subindex in range(nodes_f):
#         full_rules += CS.UniformIntegerHyperparameter("full_subindex_%s" % subindex, lower=0, upper=base_f-1)
# #         config_space.add_hyperparameter(
# #             CS.UniformIntegerHyperparameter("full_subindex_%s" % subindex, lower=0, upper=base_f-1))
#         config_space.add_hyperparameter(full_rules[subindex])
    
    for subindex in range(nodes_f):
        # define hyperparameter reference attributes
        rule_name = "full_subindex_%s" % subindex
        full_rule = CS.UniformIntegerHyperparameter(rule_name, lower=0, upper=base_f-1)
        
        # add hyperparameter to collections
#         config_space.add_hyperparameter(
#             CS.UniformIntegerHyperparameter("conv_subindex_%s" % subindex, lower=0, upper=base_c-1))
        config_space.add_hyperparameter(full_rule)
    
        full_rules = list(filter(lambda hp: "full_subindex_" in hp.name, config_space.get_hyperparameters()))
    
        # build banlist from collections
        rl = deepcopy(config_space)
        rd = {}
        for ri,rule in enumerate(full_rules,1):
#             print(len(conv_rules))
#             print(max_c[ri-1])
#             print(config_space.get_hyperparameter(rule_name).upper)
            
#             print(rule)
    
#             if (len(full_rules) == 1) & (max_f[ri-1] == config_space.get_hyperparameter(rule_name).upper):
            if (len(full_rules) == 1) & (max_f[ri-1] == config_space.get_hyperparameter(rule_name).upper):
#                 print("breaking")
                break
            elif ri != len(full_rules):
#                 rl.add_forbidden_clause(
#                     CS.ForbiddenEqualsClause(
# #                         config_space.get_hyperparameter(rule), 
#                         rule,
#                         max_c[ri-1]
#                     )
#                 )
                rd[rule.name] = CS.ForbiddenEqualsClause(
#                         config_space.get_hyperparameter(rule), 
                        rule,
                        max_f[ri-1]
                    )
            else:
#                 rl.add_forbidden_clause(
#                     CS.ForbiddenInClause(
# #                         config_space.get_hyperparameter(rule), 
#                         rule,
#                         range(
#                             max_c[ri-1] + 1, 
# #                             config_space.get_hyperparameter(rule).upper + 1
#                             rule.upper + 1
#                         )
#                     )
#                 )
                rd[rule.name] = CS.ForbiddenInClause(
#                         config_space.get_hyperparameter(rule), 
                        rule,
                        range(
                            max_f[ri-1] + 1, 
#                             config_space.get_hyperparameter(rule).upper + 1
                            rule.upper + 1
                        )
                    )
        
        # package banlist for addition to config space
#         print(rl.get_forbiddens())
        
        # add banlist to collection
#         if rl.get_forbiddens():
#             config_space.add_forbidden_clause(
#                 CS.ForbiddenAndConjunction(
#                     *rl.get_forbiddens()
#                 )
#             )
        if rd.values():
            config_space.add_forbidden_clause(
                CS.ForbiddenAndConjunction(
                    *rd.values()
                )
            )           
                
#     print(config_space.get_forbiddens())
        
#     #Define max subindices
    
#     max_conv_index = [[CS.ForbiddenEqualsClause(conv_rules[0],max_c[0] + 1)]]
#     max_full_index = [[CS.ForbiddenEqualsClause(full_rules[0],max_f[0] + 1)]]
    
#     for subindex,max_necc in enumerate(1,max_c):
#         banlist = [CS.ForbiddenEqualsClause(max_conv_index[0][0].hyperparameter,max_conv_index[0][0].value - 1)]
#         for fbd in max_conv_index:
# #             banlist += CS.ForbiddenEqualsClause(fbd.hyperparameter,fbd.value - 1)
#             banlist += CS.ForbiddenEqualsClause(conv_rules[subindex],max_necc + 1)
#         max_conv_index += CS.ForbiddenEqualsClause(conv_rules[subindex],max_necc + 1)
    
#     for hidden in range(2):
#         config_space.add_hyperparameter(
#             CS.UniformIntegerHyperparameter("cv%s" % hidden, lower=3, upper=3**4))
    
#     for hidden in range(num_fcs):
#         config_space.add_hyperparameter(
#             CS.UniformIntegerHyperparameter("fc%s" % hidden, lower=2**2, upper=2**4))
        
    return config_space

def build_forbidden(max_digits=[12, 6, 9, 0],conf=None):
    # list that holds all clauses for "And"-ing
    full_forbidden = []
    
    # add each max possible index combination
    for digit in range(1,len(max_digits)):
        forbidden = []        
        
        max_slice = max_digits[:digit]
        conf_slice = conf[:digit]
        forbidden.append(build_subforbidden(max_slice,conf_slice))
        
    full_forbidden.append(forbidden)
        
    return CS.ForbiddenAndConjunction(full_forbidden)
    
    
def build_subforbidden(ceiling=None,articles=None):
    # list that holds index combination ceiling
    sub_forbidden = {}
    sf = CS.ConfigurationSpace()
#     sub_forbidden = CS.ForbiddenAndConjunction()
    
    # collect absolute max digit(s) and soft max digit
    reqs,cap = ceiling[:-1],ceiling[-1]
    
    # create rule disallowing any numbers higher than respective max
    for sub,req in enumerate(reqs,1):
        sub_forbidden[str(sub-1)] = CS.ForbiddenEqualsClause(articles[sub-1],reqs[sub-1])
        sf.add_forbidden_clause(CS.ForbiddenEqualsClause(articles[sub-1],reqs[sub-1]))
    sub_forbidden[str(len(reqs))] = CS.ForbiddenInClause(articles[-1],range(cap+1, articles[-1].upper+1))
    
#     print(sf)
    
    return CS.ForbiddenAndConjunction(sf)
#     return sub_forbidden

In [10]:
neuron_config_space = configure_neurons()
print(neuron_config_space)

Configuration space object:
  Hyperparameters:
    batch_size, Type: Categorical, Choices: {4, 8, 16, 32}, Default: 4
    conv_subindex_0, Type: UniformInteger, Range: [0, 12], Default: 6
    conv_subindex_1, Type: UniformInteger, Range: [0, 12], Default: 6
    conv_subindex_2, Type: UniformInteger, Range: [0, 12], Default: 6
    conv_subindex_3, Type: UniformInteger, Range: [0, 12], Default: 6
    epochs, Type: Categorical, Choices: {20, 30, 40}, Default: 20
    full_subindex_0, Type: UniformInteger, Range: [0, 5], Default: 2
    full_subindex_1, Type: UniformInteger, Range: [0, 5], Default: 2
    full_subindex_2, Type: UniformInteger, Range: [0, 5], Default: 2
    full_subindex_3, Type: UniformInteger, Range: [0, 5], Default: 2
    full_subindex_4, Type: UniformInteger, Range: [0, 5], Default: 2
    full_subindex_5, Type: UniformInteger, Range: [0, 5], Default: 2
    lr, Type: UniformFloat, Range: [0.0001, 0.1], Default: 0.0031622777, on log-scale
  Forbidden Clauses:
    (Forbidden:

sf = {
    "c": [12,6,9,0],
    "a": [
        CS.UniformIntegerHyperparameter(name="conv_subindex_1", lower=0, upper=12),
        CS.UniformIntegerHyperparameter(name="conv_subindex_2", lower=0, upper=12),
        CS.UniformIntegerHyperparameter(name="conv_subindex_3", lower=0, upper=12),
        CS.UniformIntegerHyperparameter(name="conv_subindex_4", lower=0, upper=12)
    ]
}
print(build_subforbidden(sf["c"],sf["a"]))

In [11]:
# perform neuron configuration trials
# def search_neurons(layer_config, checkpoint_dir=None):
def search_neurons(checkpoint_dir=None):
    num_samples=40
    max_num_epochs=40
    gpus_per_trial=1
    
#     print(layer_config)
    
#     neuron_config_space = configure_neurons(layer_config["num_convs"], layer_config["num_fcs"])
    neuron_config_space = configure_neurons()
    
    experiment_metrics = dict(metric="accuracy", mode="max")
    
    #pre-load data to avoid races
    load_data()
    
    scheduler = HyperBandForBOHB(
#         metric="loss",
#         mode="min",
        max_t=20,
        reduction_factor=2,
        **experiment_metrics)
    search = TuneBOHB(
        neuron_config_space,
        max_concurrent=8,
#         metric="loss",
#         mode="min",
        **experiment_metrics)
    reporter = JupyterNotebookReporter(
        overwrite=True,
#         parameter_columns=["l1", "l2", "lr", "batch_size", "epochs"],
        parameter_columns=neuron_config_space.get_hyperparameter_names(),
        metric_columns=["loss", "accuracy", "training_iteration"])
    result = tune.run(
        partial(train_cifar),
        verbose=2,
        name="neurons",
        local_dir=r.absolute(),
        resources_per_trial={"cpu": cpu_use, "gpu": gpu_use},
        max_failures=3,
#         config=neuron_config_space,
        num_samples=num_samples,
        scheduler=scheduler,
        search_alg=search,
        progress_reporter=reporter)

    best_trial = result.get_best_trial("accuracy", "max", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}".format(
        best_trial.last_result["accuracy"]))

    
    def cv_discrim(s): return 'conv_subindex_' in s
    def fc_discrim(s): return 'full_subindex_' in s
    best_cvs = [best_trial.config[hp] for hp in list(filter(cv_discrim, best_trial.config.keys()))]
    best_fcs = [best_trial.config[hp] for hp in list(filter(fc_discrim, best_trial.config.keys()))]
# #     best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"])

    cfg = decode([best_cvs, best_fcs])
    best_trained_model = Net(cfg)
    
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

    test_acc = test_accuracy(best_trained_model, device)
    
    if checkpoint_dir != None:
        tune.report(accuracy=test_acc)
    
#     with tune.checkpoint_dir("nodes") as checkpoint_dir:
#         path = os.path.join(checkpoint_dir, "checkpoint")
#         torch.save(best_trained_model.state_dict(), path)
    
    print("Best trial test set accuracy: {}".format(test_acc))
    
    return best_trained_model.state_dict()


In [12]:
# perform layer count trials
def search_layers(num_samples=10, max_num_epochs=10, gpus_per_trial=0):
    data_dir=d.absolute()
    load_data(data_dir)
    layer_config_space = CS.ConfigurationSpace()

    layer_config_space.add_hyperparameter(
        CS.Constant("num_convs", value=2))
    layer_config_space.add_hyperparameter(
        CS.UniformIntegerHyperparameter("num_fcs", lower=2, upper=2**2))
    
    experiment_metrics = dict(metric="accuracy", mode="max")
    

    scheduler = HyperBandForBOHB(
        max_t=max_num_epochs,
        reduction_factor=2,
        **experiment_metrics)
    search = TuneBOHB(
        layer_config_space,
        max_concurrent=4,
        **experiment_metrics)
    reporter = CLIReporter(
#         overwrite=True,
        parameter_columns=layer_config_space.get_hyperparameter_names(),
        metric_columns=["loss", "accuracy", "training_iteration"])
    result = tune.run(
        partial(search_neurons),
        verbose=2,
        name="layers",
        local_dir=r.absolute(),
#         config=layer_config_space,
        resources_per_trial={"gpu": gpus_per_trial},
        max_failures=3,
        num_samples=num_samples,
        scheduler=scheduler,
        search_alg=search,
        progress_reporter=reporter)

    best_trial = result.get_best_trial("accuracy", "max", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}".format(
        best_trial.last_result["accuracy"]))

    best_trained_model = Net([best_trial.config["num_convs"], best_trial.config["num_fcs"]])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"),map_location=torch.device('cpu'))
    best_trained_model.load_state_dict(model_state)

    test_acc = test_accuracy(best_trained_model, device)
    print("Best trial test set accuracy: {}".format(test_acc))
    
    return best_trained_model.state_dict()


# perform test
model = Net()
if __name__ == "__main__":
    # You can change the number of GPUs per trial here:
    model = search_layers(num_samples=10, max_num_epochs=10, gpus_per_trial=1)


In [13]:
print("Resource usage can be viewed at port 127.0.0.1:8265 or higher")

Resource usage can be viewed at port 127.0.0.1:8265 or higher


In [14]:
# layer_config_space = {}

# # for hp in ["num_convs","num_fcs"]:
# #     layer_config_space[hp] = np.random.randint(2,2**3)
# # layer_config_space["num_convs"] = np.random.randint(2,3)
# layer_config_space["num_convs"] = 2
# layer_config_space["num_fcs"] = np.random.randint(2,2**2)

# # cpu_use = 1
# # gpu_use = 0
# # data_dir = os.path.abspath("/home/grottesco/Source/RayTuneTut/data/")
# # checkpoint_dir = os.path.abspath("/home/grottesco/Source/RayTuneTut/checkpoints")
# print("Resource usage can be viewed at 127.0.0.1:8265")
    
model = search_neurons()

Trial name,status,loc,batch_size,conv_subindex_0,conv_subindex_1,conv_subindex_2,conv_subindex_3,epochs,full_subindex_0,full_subindex_1,full_subindex_2,full_subindex_3,full_subindex_4,full_subindex_5,lr,loss,accuracy,training_iteration
DEFAULT_8684b754,TERMINATED,,4,8,0,11,9,20,0,1,4,5,2,1,0.00534174,2.30667,0.101075,1
DEFAULT_868de63a,TERMINATED,,8,0,3,10,11,30,2,2,2,4,4,0,0.00978275,2.30563,0.10035,2
DEFAULT_8692a53a,TERMINATED,,4,5,10,7,7,20,3,2,4,5,0,1,0.000863218,0.731154,0.73925,8
DEFAULT_86978b04,TERMINATED,,32,11,3,4,6,40,4,4,4,5,3,3,0.0252347,1.58772,0.435325,2
DEFAULT_869ca652,TERMINATED,,16,0,8,7,10,30,3,4,4,5,0,3,0.0656635,,0.10095,1
DEFAULT_86ab91ee,TERMINATED,,8,1,7,7,11,40,1,2,1,0,4,4,0.0159174,2.30479,0.101325,1
DEFAULT_86b0beda,TERMINATED,,16,5,7,12,7,30,3,3,5,2,4,0,0.000139136,2.29669,0.105175,1
DEFAULT_86b63e78,TERMINATED,,8,2,3,11,12,20,4,2,0,4,4,3,0.0616487,2.32009,0.10065,1
DEFAULT_9c0ece16,TERMINATED,,16,9,9,6,6,30,3,5,2,1,4,1,0.00629427,1.10687,0.63645,4
DEFAULT_9e09e872,TERMINATED,,4,10,9,9,6,20,0,5,0,2,3,0,0.0112515,2.31555,0.09915,1


2020-12-28 00:18:32,184	INFO tune.py:439 -- Total run time: 1977.78 seconds (1976.37 seconds for the tuning loop).


Best trial config: {'batch_size': 32, 'conv_subindex_0': 10, 'conv_subindex_1': 0, 'conv_subindex_2': 4, 'conv_subindex_3': 1, 'epochs': 20, 'full_subindex_0': 5, 'full_subindex_1': 0, 'full_subindex_2': 0, 'full_subindex_3': 3, 'full_subindex_4': 1, 'full_subindex_5': 5, 'lr': 0.001965405713503036}
Best trial final validation loss: 0.504899407839775
Best trial final validation accuracy: 0.82515
(138, 231)
(48, 56, 61)
Files already downloaded and verified
Files already downloaded and verified
Best trial test set accuracy: 0.6765


In [38]:
[print(k,"\n",model[k].shape[0]) for k in model.keys()]

layers.conv1.weight 
 138
layers.conv1.bias 
 138
layers.conv2.weight 
 231
layers.conv2.bias 
 231
layers.fc1.weight 
 48
layers.fc1.bias 
 48
layers.fc2.weight 
 56
layers.fc2.bias 
 56
layers.fc3.weight 
 61
layers.fc3.bias 
 61
layers.fco.weight 
 10
layers.fco.bias 
 10


[None, None, None, None, None, None, None, None, None, None, None, None]

In [39]:
print(model)

OrderedDict([('layers.conv1.weight', tensor([[[[-0.0005, -0.1587, -0.0468, -0.0604,  0.0033],
          [ 0.1313, -0.1065, -0.1142,  0.0591, -0.0319],
          [ 0.0850,  0.1069, -0.1066,  0.0757,  0.1074],
          [-0.0508,  0.1567, -0.0740,  0.0991, -0.0686],
          [ 0.0005,  0.0140,  0.1365, -0.0556,  0.1351]],

         [[ 0.0934, -0.1193,  0.0927,  0.0950, -0.0800],
          [-0.0430, -0.0455,  0.0275, -0.0525, -0.0477],
          [-0.0141, -0.0211, -0.0807,  0.0368, -0.0052],
          [-0.0770,  0.0901,  0.1030, -0.0928, -0.0396],
          [-0.1674, -0.0316,  0.0514, -0.0252,  0.0349]],

         [[ 0.0723, -0.0440, -0.0361,  0.1443, -0.1108],
          [ 0.1614, -0.0173,  0.0392,  0.0030, -0.0230],
          [ 0.0031,  0.1087, -0.0266,  0.0539, -0.1212],
          [-0.0009, -0.0095, -0.0177, -0.0017,  0.0381],
          [-0.0741, -0.0290,  0.1046,  0.0680, -0.1052]]],


        [[[-0.0806, -0.0905, -0.0799,  0.0242,  0.0875],
          [-0.1311, -0.0396,  0.1189, -0.01

!rm -rf ./data/* ./ray_results/layers/* ./ray_results/neurons/* 