forked from lucawint/ner-rnn-html
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.ini
75 lines (62 loc) · 2.09 KB
/
config.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
[Project]
# The full path to the directory containing everything, e.g. /home/ner
ROOT_DIR : /home/ner
# Location of the training data
DATA_FILENAME : wikigold.conll_inline.txt
DATA_DIR : ${ROOT_DIR}/data
DATA_FILEPATH : ${DATA_DIR}/${DATA_FILENAME}
# Location of the trained models
MODELS_DIR : ${ROOT_DIR}/models
# Comma separated entity names
ENTITIES : PER,LOC,ORG,MISC
# Train, validation and evaluation ratios
# should be 1 when summed up
TRAIN_SPLIT : 0.6
VAL_SPLIT : 0.2
EVAL_SPLIT : 0.2
# Whether to randomize the order of the documents
# while splitting them up into training, validation
# and evaluation sets
RANDOMIZE_ORDER : False
# Whether to evaluate the model on the evaluation dataset
# after training
EVALUATE : True
# Whether to tokenize character by character
# or word by word inside of HTML tags.
# Choose between True and False
CHAR_BY_CHAR : True
# The minimum token count for a token to be included in the encodings.
# Useful when tokenizing word by word, with a minimum token count > 1
# the model will not have every word in its vocabulary, just the ones
# that occur more often than the minimum token count.
MIN_TOKEN_COUNT : 10
[Server]
HOST : 0.0.0.0
PORT : 8000
[Neural Network]
# First layer can be an embedding layer or a dense layer
# Choose between Embedding and Dense
FIRST_LAYER : Embedding
# Size of the first layer if Dense is chosen
DENSE_SIZE : 20
# Size of the first layer if Embedding is chosen
EMBED_SIZE : 30
# Size of the hidden, recurrent layer
HIDDEN_SIZE : 30
# Choose between bidirectional layers of the following
# types: LSTM, GRU and SimpleRNN
REC_LAYER_TYPE : GRU
# Number of recurrent layers in the recurrent layer stack
NO_OF_REC_LAYERS : 1
# Amount of tokens to look back and forward to in the last step.
# Reduces the amount of tags that contain
# wrongly classified spaces like this: <LOC>New</LOC> <LOC>York</LOC>
LAST_REC_SIZE : 3
# Size of the batch to process at once ( # of documents )
BATCH_SIZE : 1
# Number of epochs to run
EPOCHS : 100
# Tensorboard logging
TB_LOGGING : True
# Log directory for Tensorboard data
TB_LOGDIR : ${Project:ROOT_DIR}/TensorBoard_logdir