-
Notifications
You must be signed in to change notification settings - Fork 28
Parameters
bugface edited this page Feb 15, 2022
·
10 revisions
we will explain each parameter's function
- model_type = 'bert' # which type of transformer models (e.g., bert, roberta, xlnet, longformer, deberta, albert)
- pretrained_model = 'bert-base-uncased' # pretrained models (e.g., bert-base-uncased will download from huggingface; you can put a path linked to your pretrained model)
- config_name = self.pretrained_model # model config file; default to the same as pretrained_model
- tokenizer_name = self.pretrained_model # tokenizer info; default to the same as pretrained_model
- do_lower_case = True # convert words to lower case
- data_dir = 'conll-2003' # where the data stored (should have train.txt and dev.txt in this dir)
- data_has_offset_information = False # whether there are offset information stored in the training and test data
- new_model_dir = 'new_ner_model' # fine-tuned model saved dir
- overwrite_model_dir = True # if a dir of model exist as the new_model_dir, if not set this flag will raise an error
- do_train = True # whether perform fine-tuning on training data
- do_predict = True # whether perform prediction based on fine-tuned model on test data
- predict_output_file = 'pred.txt' # if set do_pred, then the prediction will be output in this file
- max_seq_length = 512 # max len of tokens in each training data
- model_selection_scoring = 'strict-f_score-1' # the evaluation criteria used for model selected (strict-f_score-1, relax-f_score-1, strict-f_score-2, strict-f_score-0.5)
- train_batch_size = 4 # train batch size (default 4 is a good value to start; the larger the value, the more GPU MEM will consumed)
- eval_batch_size = 16 # this value is not important, 16 or 32 is a good value
- learning_rate = 0.00001 # learning rate, we recommend use 1e-5
- seed = 13 # random seed; if the prediction yield unreasonable results (e.g., all prediction are B-), then you need to change the random seed and re-train the model
- num_train_epochs = 20 # the number of iterations for fine tuninig
- gradient_accumulation_steps = 1 # if set this to 2, then the training will train two steps before performing backpropagation
- do_warmup = True # whether adopt a linear warm up strategy on learning rate
- warmup_ratio = 0.1 # percentage of steps used for warming up the learning rate
- max_num_checkpoints = 1 # how many checkpoints (fine-tuned models) you want to save, if 1, then only the last best model saved.
- weight_decay = 0.0 # apply weight decay on bias parameters
- adam_epsilon = 0.00000001 # for numerical stability (take default 1e-8 or you can try 1e-6)
- max_grad_norm = 1.0 # gradients clip [-1.0, 1.0]; if set to 2 then the range becomes [-2.0, 2.0]
- log_file = 'log.txt' # log file, you can find the best performance on dev
- log_lvl = 'i' # log level, i=INFO (we recommend use default i)
- fp16 = False # whether use float point 16 training
- train_steps = 100 # number of training steps between two evaluation on the dev set; we recommend to set as (total_num_sentences / batch_size / 4)
- early_stop = -1 # If set to 3, then if in 3 consecutive training epochs there is no performance improvement on the dev, the training will be stopped.
- progress_bar = True # whether show training process bar (tqdm)
- save_model_core = True # whether save the bert model without classification layer
- use_crf = False # whether use CRF layer (slow the training speed but slightly improve the performance)
- focal_loss = True will use focal loss to replace cross entropy loss; this is an experimental update and may not yield good results
- focal_loss_gamma = 2 if you focal loss, this flag can help to set the gamma value; default at 2; may need to be tuned
- resume_from_model give a model to be resume trained, must be a NER model trained previously; the new datasets must have same annotation as the old one
- adversarial_training_method support PGD and FGM methods for NLP adversarial training to enhance generalizability; default is None which does not apply adversarial training.