From 52b63c4ed013267ef51524d123a5cd9a1fefefec Mon Sep 17 00:00:00 2001 From: chongxiaoc <74630762+chongxiaoc@users.noreply.github.com> Date: Thu, 28 Oct 2021 08:26:03 -0700 Subject: [PATCH] fix the example of pytorch_lightning_mnist.py (#3245) - remove unused arg parameters - fix model test issue on GPU Signed-off-by: Chongxiao Cao Signed-off-by: weihanmines --- CHANGELOG.md | 2 ++ examples/pytorch/pytorch_lightning_mnist.py | 17 +++-------------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b4e57e26f..65fe710543 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Fixed +- fix the example of pytorch_lightning_mnist.py ([#3245](https://github.com/horovod/horovod/pull/3245)) + ## [v0.23.0] - 2021-10-06 ### Added diff --git a/examples/pytorch/pytorch_lightning_mnist.py b/examples/pytorch/pytorch_lightning_mnist.py index d60c4ae194..e6ffcccf1f 100644 --- a/examples/pytorch/pytorch_lightning_mnist.py +++ b/examples/pytorch/pytorch_lightning_mnist.py @@ -24,22 +24,10 @@ help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') -parser.add_argument('--lr', type=float, default=0.01, metavar='LR', - help='learning rate (default: 0.01)') -parser.add_argument('--momentum', type=float, default=0.5, metavar='M', - help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=42, metavar='S', help='random seed (default: 42)') -parser.add_argument('--log-interval', type=int, default=10, metavar='N', - help='how many batches to wait before logging training status') -parser.add_argument('--fp16-allreduce', action='store_true', default=False, - help='use fp16 compression during allreduce') -parser.add_argument('--use-adasum', action='store_true', default=False, - help='use adasum algorithm to do reduction') -parser.add_argument('--gradient-predivide-factor', type=float, default=1.0, - help='apply gradient predivide factor in optimizer (default: 1.0)') parser.add_argument('--data-dir', help='location of the training dataset in the local filesystem (will be downloaded if needed)') @@ -205,7 +193,7 @@ def on_train_end(self, trainer, model): callbacks = [MyDummyCallback(), ModelCheckpoint(dirpath=ckpt_path)] trainer = Trainer(accelerator='horovod', - gpus=(1 if torch.cuda.is_available() else 0), + gpus=(1 if args.cuda else 0), callbacks=callbacks, max_epochs=epochs, limit_train_batches=train_percent, @@ -214,6 +202,7 @@ def on_train_end(self, trainer, model): num_sanity_val_steps=0) trainer.fit(model) - + if args.cuda: + model = model.cuda() test()