-
Notifications
You must be signed in to change notification settings - Fork 1
/
stage_one_pretrain.sh
executable file
·40 lines (35 loc) · 1.3 KB
/
stage_one_pretrain.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/bash
# This script is used for Stage 1 pretraining of MixDA.
# You can specify the following parameters.
# Example: bash stage_one_pretrain.sh model/test datasets/amazon/train.json test-project 20 1 3 1e-4 2e-4 "" 0
dirname=$1 # the directory to save the pre-trained model
knowledge_path=$2 # path to domain knowledge
project_name=$3 # the name of the project
max_epochs=$4 # the maximum number of epochs
devices=$5 # number of GPUs used
batch_size=$6 # batch size
lr=$7 # learning rate
moa_lr=$8 # learning rate for MoA
realm_record=$9 # path to the REALM record used for information retrieval. Ignore it to disable it
no_old_knowledge=${10} # set it to 1 if you want to disale old domain knowledge
other_params=""
if [[ $no_old_knowledge == 1 ]]; then
other_params="--no_old_knowledge"
fi
if [[ $realm_record != "" ]]; then
other_params="${other_params} --realm_record ${realm_record}"
fi
python -m scripts.run_stage_one \
--max_epochs ${max_epochs} \
--accelerator gpu --strategy ddp \
--devices ${devices} \
--batch_size ${batch_size} \
--layers 7,11 \
--knowledge_data_path ${knowledge_path} \
--project_name ${project_name} \
--run_name ${project_name} \
--dirpath ${dirname} \
--lr ${lr} \
--moe_lr ${moa_lr} \
--adapter_down_scale 16 \
${other_params}