config.ini

[Project]
# The full path to the directory containing everything, e.g. /home/ner
ROOT_DIR : /home/ner

# Location of the training data
DATA_FILENAME : wikigold.conll_inline.txt
DATA_DIR : ${ROOT_DIR}/data
DATA_FILEPATH : ${DATA_DIR}/${DATA_FILENAME}

# Location of the trained models
MODELS_DIR : ${ROOT_DIR}/models

# Comma separated entity names
ENTITIES : PER,LOC,ORG,MISC

# Train, validation and evaluation ratios
# should be 1 when summed up
TRAIN_SPLIT : 0.6
VAL_SPLIT : 0.2
EVAL_SPLIT : 0.2

# Whether to randomize the order of the documents
# while splitting them up into training, validation
# and evaluation sets
RANDOMIZE_ORDER : False

# Whether to evaluate the model on the evaluation dataset
# after training
EVALUATE : True

# Whether to tokenize character by character
# or word by word inside of HTML tags.
# Choose between True and False
CHAR_BY_CHAR : True

# The minimum token count for a token to be included in the encodings.
# Useful when tokenizing word by word, with a minimum token count > 1
# the model will not have every word in its vocabulary, just the ones
# that occur more often than the minimum token count.
MIN_TOKEN_COUNT : 10

[Server]
HOST : 0.0.0.0
PORT : 8000

[Neural Network]
# First layer can be an embedding layer or a dense layer
# Choose between Embedding and Dense
FIRST_LAYER : Embedding
# Size of the first layer if Dense is chosen
DENSE_SIZE : 20
# Size of the first layer if Embedding is chosen
EMBED_SIZE : 30

# Size of the hidden, recurrent layer
HIDDEN_SIZE : 30
# Choose between bidirectional layers of the following
# types: LSTM, GRU and SimpleRNN
REC_LAYER_TYPE : GRU
# Number of recurrent layers in the recurrent layer stack
NO_OF_REC_LAYERS : 1
# Amount of tokens to look back and forward to in the last step.
# Reduces the amount of tags that contain
# wrongly classified spaces like this: <LOC>New</LOC> <LOC>York</LOC>
LAST_REC_SIZE : 3

# Size of the batch to process at once ( # of documents )
BATCH_SIZE : 1
# Number of epochs to run
EPOCHS : 100

# Tensorboard logging
TB_LOGGING : True
# Log directory for Tensorboard data
TB_LOGDIR : ${Project:ROOT_DIR}/TensorBoard_logdir