Skip to content

Commit

Permalink
Expose DeepSpeeds built-in gradient accumulation
Browse files Browse the repository at this point in the history
  • Loading branch information
afiaka87 committed Jun 7, 2021
1 parent 6937500 commit 7779bd1
Showing 1 changed file with 3 additions and 0 deletions.
3 changes: 3 additions & 0 deletions train_dalle.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@

train_group.add_argument('--batch_size', default = 4, type = int, help = 'Batch size')

train_group.add_argument('--ga_steps', default = 1, type = int, help = 'Number of steps to accumulate gradients across per each iteration. DeepSpeed only.')

train_group.add_argument('--learning_rate', default = 3e-4, type = float, help = 'Learning rate')

train_group.add_argument('--clip_grad_norm', default = 0.5, type = float, help = 'Clip gradient norm')
Expand Down Expand Up @@ -325,6 +327,7 @@ def group_weight(model):
distr_backend.check_batch_size(BATCH_SIZE)
deepspeed_config = {
'train_batch_size': BATCH_SIZE,
'gradient_accumulation_steps': args.ga_steps,
'gradient_clipping': GRAD_CLIP_NORM,
'fp16': {
'enabled': args.fp16,
Expand Down

0 comments on commit 7779bd1

Please sign in to comment.