exp_scripts/DARPA-best-faster.conf

# One lr per layer in the architecture. 
lr = [0.01,0.001,0.0001]
act = ['tanh','tanh','rectifier']
depth = 3
n_hid = [1400,2500,4000]

# One noise type per layer in the architecture. 'binomial_NLP' is salt-and-pepper noise. 'binomial' means simply overwrite with a binomial score.
noise = ['binomial_NLP','binomial','binomial']
noise_lvl = [(0.7,0.0041),0.5,0.3]

weight_regularization_type = 'l2'
weight_regularization_coeff = [0.0,0.0,0.0,0.0]
activation_regularization_type = 'l1'
activation_regularization_coeff = [0.0,0.0,0.01]

# Number of pretraining epochs, one-per-layer
#nepochs = [30,30,4]
nepochs = [10,10,30]

# Different validation runs
#        - 100 training examples (x20 different samples of 100 training examples)
#        - 1000 training examples (x10 different samples of 1000 training examples)
#        - 10000 training examples (x1 different sample of 10000 training examples)
# (because of jobman, the keys have to be strings, not ints)
# NOTE: Probably you don't want to make trainsize larger than 10K,
# because it will be too large for CPU memory.
validation_runs_for_each_trainingsize = {"100": 20, "1000": 10, "10000": 1}

# For each layer, a list of the epochs at which you evaluate the
# reconstruction error and linear-SVM-supervised error.
# All the different results you have from here will be stored in a
# separate file per layer.
#epochstest = [[0,2,4,8,15,30],[0,2,4,8,15,30],[0,1,2,3,4]]
epochstest = [[5,10],[5,10],[0,2,4,8,16,30]]

BATCH_TEST = 100
BATCH_CREATION_LIBSVM = 500
NB_MAX_TRAINING_EXAMPLES_SVM = 10000
#NB_MAX_TRAINING_EXAMPLES_SVM = 1000     # FIXME: Change back to 10000 <========================================================================
                                        # 1000 is just for fast running during development
#NB_MAX_TRAINING_EXAMPLES_SVM = 100     # FIXME: Change back to 10000 <========================================================================
#                                        # 100 is just for superfast running during development

SVM_INITIALC    = 0.001
SVM_STEPFACTOR  = 10.
SVM_MAXSTEPS    = 10

#hardcoded path to your liblinear source:
#SVMPATH = '/u/glorotxa/work/NLP/DARPAproject/netscale_sentiment_for_ET/lib/liblinear/'
SVMPATH = '/home/turian/dev/python/DARPA-preprocessor/preprocessor_baseline_UdeM/lib/install/bin/'

batchsize = 10

# The total number of files into which the training set is broken
nb_files = 15
#path_data = '/u/glorotxa/work/NLP/DARPAproject/'
path_data = '/home/turian/data/DARPAproject/'
# Train and test (validation) here should be disjoint subsets of the
# original full training set.
name_traindata = 'OpenTable_5000_train_instances'
name_trainlabel =  'OpenTable_5000_train_labels'
name_testdata = 'OpenTable_5000_test_instances'
name_testlabel = 'OpenTable_5000_test_labels'

# If there is a model file specified to build upon, the output of this
# model is the input for the model we are currently building.
model_to_build_upon = None

ninputs = 5000

# inputtype ('binary', 'tfidf', other options?) determines what the
# decoding activation function is for the first layer
# e.g. inputtype 'tfidf' ('tf*idf'?) uses activation function softplus
# to decode the tf*idf.
inputtype = 'binary'

seed = 123