Skip to content

Commit

Permalink
drop list support for gluon trainer (apache#18877)
Browse files Browse the repository at this point in the history
Co-authored-by: Ubuntu <ubuntu@ip-172-31-42-138.ec2.internal>
  • Loading branch information
eric-haibin-lin and Ubuntu committed Aug 8, 2020
1 parent dde635f commit d5fdcbf
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 32 deletions.
36 changes: 23 additions & 13 deletions python/mxnet/gluon/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"""Parameter optimizer."""
__all__ = ['Trainer']

import sys
from collections import OrderedDict

from .. import optimizer as opt
Expand Down Expand Up @@ -77,25 +78,34 @@ class Trainer(object):
"""
def __init__(self, params, optimizer, optimizer_params=None, kvstore='device',
compression_params=None, update_on_kvstore=None):
self._param2name = {}
self._param2idx = {}
py_version = sys.version_info
assert isinstance(params, (dict, OrderedDict)), \
'invalid params type: {}. Expected dict type'.format(type(params))
names = list(params.keys())
param_list = []
if isinstance(params, (dict, OrderedDict)):
for key in sorted(list(params.keys())):
param_list.append(params[key])
params = param_list
if not isinstance(params, (list, tuple)):
raise ValueError(
"First argument must be a list or dict of Parameters, " \
"got %s."%(type(params)))
# only python 3.5 requires sorting
if py_version[0] == 3 and py_version[1] == 5:
names = sorted(names)
for name in names:
p = params[name]
if not isinstance(p, Parameter):
raise ValueError(
"First argument must be a dict of Parameters, " \
"got list of %s."%(type(p)))
param_list.append(p)
# Shared parameters have same uuid; only need to store one of the shared versions
if p._uuid in self._param2name:
continue
self._param2name[p._uuid] = name
params = param_list

self._params = []
# parameters to initialize on the kvstore
self._contains_sparse_weight = False
self._contains_sparse_grad = False
self._param2idx = {}
for i, param in enumerate(params):
if not isinstance(param, Parameter):
raise ValueError(
"First argument must be a list or dict of Parameters, " \
"got list of %s."%(type(param)))
if param._uuid in self._param2idx:
# Shared parameters have same uuid; only need to store one of the shared versions
continue
Expand Down
2 changes: 1 addition & 1 deletion tests/nightly/dist_async_kvstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def check_trainer_kv_update(weight_stype, update_on_kv):
x = mx.gluon.Parameter('x', shape=(10,1), lr_mult=1.0, stype=weight_stype)
x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
try:
trainer = mx.gluon.Trainer([x], 'sgd', {'learning_rate': 0.1},
trainer = mx.gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 0.1},
kvstore=kv, update_on_kvstore=update_on_kv)
trainer._init_kvstore()
assert trainer._kv_initialized
Expand Down
2 changes: 1 addition & 1 deletion tests/nightly/dist_device_sync_kvstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def check_trainer_kv_update(update_on_kv):
x = mx.gluon.Parameter('x', shape=(10,1), lr_mult=1.0)
x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
try:
trainer = mx.gluon.Trainer([x], 'sgd', {'learning_rate': 0.1},
trainer = mx.gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 0.1},
kvstore=kv, update_on_kvstore=update_on_kv)
trainer._init_kvstore()
assert trainer._kv_initialized
Expand Down
6 changes: 3 additions & 3 deletions tests/nightly/dist_sync_kvstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def test_gluon_trainer_type():
def check_trainer_kv_type(stype, grad_stype, update_on_kv, expected):
x = mx.gluon.Parameter('x', shape=(10,1), lr_mult=1.0, stype=stype, grad_stype=grad_stype)
x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
trainer = mx.gluon.Trainer([x], 'sgd', {'learning_rate': 0.1},
trainer = mx.gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 0.1},
kvstore=kv, update_on_kvstore=update_on_kv)
try:
trainer._init_kvstore()
Expand All @@ -405,7 +405,7 @@ def check_trainer_step():
shape = (10, 1)
x = mx.gluon.Parameter('x', shape=shape)
x.initialize(ctx=ctx, init='ones')
trainer = mx.gluon.Trainer([x], 'sgd', {'learning_rate': 1.0, 'multi_precision': False}, kvstore=kv)
trainer = mx.gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 1.0, 'multi_precision': False}, kvstore=kv)
with mx.autograd.record():
w = x.data(ctx)
y = (my_rank + 1) * w
Expand All @@ -423,7 +423,7 @@ def check_trainer_sparse_step():
all_rows = mx.nd.arange(0, shape[0], ctx=ctx)
x = mx.gluon.Parameter('x', shape=shape, stype='row_sparse', grad_stype='row_sparse')
x.initialize(ctx=ctx, init='ones')
trainer = mx.gluon.Trainer([x], 'sgd', {'learning_rate': 1.0}, kvstore=kv)
trainer = mx.gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 1.0}, kvstore=kv)
with mx.autograd.record():
w = x.row_sparse_data(all_rows)
y = (my_rank + 1) * w
Expand Down
4 changes: 2 additions & 2 deletions tests/python/unittest/test_gluon.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def test_sparse_parameter():
assert len(p.list_grad()) == 2
# getting row_sparse data without trainer throws an exception
assertRaises(RuntimeError, p.list_row_sparse_data, row_id)
trainer = mx.gluon.Trainer([p], 'sgd')
trainer = mx.gluon.Trainer({'p':p}, 'sgd')
assert len(p.list_row_sparse_data(row_id)) == 2
weight = p.row_sparse_data(row_id)
assert weight.context == mx.cpu(1)
Expand Down Expand Up @@ -104,7 +104,7 @@ def test_parameter_row_sparse_data():
dim0 = 4
x = gluon.Parameter('x', shape=(dim0, 2), stype='row_sparse')
x.initialize(init='xavier', ctx=[ctx0, ctx1])
trainer = gluon.Trainer([x], 'sgd')
trainer = gluon.Trainer({'x':x}, 'sgd')
x_param = x._data[0].copy()
assert x_param.stype == 'row_sparse'
row_id_0 = mx.nd.array([0,1], ctx=ctx0)
Expand Down
24 changes: 12 additions & 12 deletions tests/python/unittest/test_gluon_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,21 @@ def test_multi_trainer():
x = gluon.Parameter('x', shape=(10,), stype='row_sparse')
x.initialize()
# test set trainer
trainer0 = gluon.Trainer([x], 'sgd')
trainer0 = gluon.Trainer({'x':x}, 'sgd')
assert(x._trainer() is trainer0)
# test unset trainer
x._set_trainer(None)
assert(x._trainer is None)
x._set_trainer(trainer0)
with pytest.raises(RuntimeError):
# multiple trainers for a sparse Parameter is not allowed
trainer1 = gluon.Trainer([x], 'sgd')
trainer1 = gluon.Trainer({'x':x}, 'sgd')

@with_seed()
def test_trainer_with_sparse_grad_on_single_context():
x = gluon.Parameter('x', shape=(10,), grad_stype='row_sparse')
x.initialize(ctx=[mx.cpu(0)], init='zeros')
trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 1.0, 'momentum': 0.5})
trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 1.0, 'momentum': 0.5})
with mx.autograd.record():
for w in x.list_data():
y = w + 1
Expand All @@ -66,7 +66,7 @@ def test_trainer_with_teststore():
x = gluon.Parameter('x', shape=(10,))
x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
kv = mx.kv.create('teststore')
trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 1.0, 'momentum': 0.5}, kvstore=kv)
trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 1.0, 'momentum': 0.5}, kvstore=kv)
with mx.autograd.record():
for w in x.list_data():
y = w + 1
Expand All @@ -77,14 +77,14 @@ def test_trainer_with_teststore():
assert (x.data(mx.cpu(1)).asnumpy() == -2).all()
# Expect exceptions if update_on_kvstore is set to True,
# because TestStore does not support that
invalid_trainer = gluon.Trainer([x], 'sgd', kvstore=kv, update_on_kvstore=True)
invalid_trainer = gluon.Trainer({'x':x}, 'sgd', kvstore=kv, update_on_kvstore=True)
pytest.raises(ValueError, invalid_trainer._init_kvstore)

@with_seed()
def test_trainer():
x = gluon.Parameter('x', shape=(10,))
x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 1.0, 'momentum': 0.5})
trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 1.0, 'momentum': 0.5})
with mx.autograd.record():
for w in x.list_data():
y = w + 1
Expand Down Expand Up @@ -119,7 +119,7 @@ def test_trainer():

x = gluon.Parameter('x', shape=(10,))
x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
trainer2 = gluon.Trainer([x], 'sgd', {'learning_rate': 1.0, 'momentum': 0.5},
trainer2 = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 1.0, 'momentum': 0.5},
update_on_kvstore=False)
with mx.autograd.record():
for i, w in enumerate(x.list_data()):
Expand All @@ -139,7 +139,7 @@ def test_trainer_save_load():

x = gluon.Parameter('x', shape=(10,), lr_mult=1.0)
x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 0.1})
trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 0.1})
with mx.autograd.record():
for w in x.list_data():
y = w + 1
Expand All @@ -158,7 +158,7 @@ def test_trainer_sparse_save_load():
x = gluon.Parameter('x', shape=(10, 1), lr_mult=1.0,
stype='row_sparse', grad_stype='row_sparse')
x.initialize(ctx=[mx.cpu(0)], init='zeros')
trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 0.1})
trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 0.1})
all_rows = mx.nd.arange(0, 10, ctx=mx.cpu(0))
with mx.autograd.record():
for w in x.list_row_sparse_data(all_rows):
Expand Down Expand Up @@ -257,7 +257,7 @@ def test_trainer_sparse_kv():
def check_trainer_sparse_kv(kv, stype, grad_stype, update_on_kv, expected):
x = mx.gluon.Parameter('x', shape=(10,1), lr_mult=1.0, stype=stype, grad_stype=grad_stype)
x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 0.1},
trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 0.1},
kvstore=kv, update_on_kvstore=update_on_kv)
all_rows = mx.nd.arange(0, 10, ctx=mx.cpu(0))
try:
Expand Down Expand Up @@ -297,7 +297,7 @@ def test_trainer_lr_sched():
factor = 0.1
lr = 1
lr_sched = mx.lr_scheduler.FactorScheduler(freq, factor=factor, base_lr=lr)
trainer = gluon.Trainer([x], 'sgd', {'learning_rate': lr, 'lr_scheduler': lr_sched})
trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': lr, 'lr_scheduler': lr_sched})
for i in range(10):
with mx.autograd.record():
for w in x.list_data():
Expand All @@ -316,7 +316,7 @@ def test_trainer_lr_sched():
factor = 0.1
lr = 1
lr_sched = mx.lr_scheduler.FactorScheduler(freq, factor=factor, base_lr=lr)
trainer = gluon.Trainer([x], 'sgd', {'learning_rate': lr, 'lr_scheduler': lr_sched},
trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': lr, 'lr_scheduler': lr_sched},
update_on_kvstore=False)
for i in range(10):
with mx.autograd.record():
Expand Down

0 comments on commit d5fdcbf

Please sign in to comment.