mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Add validator for optimizers when parameters are shared
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/18497 Reviewed By: kennyhorror Differential Revision: D14614738 fbshipit-source-id: beddd8349827dcc8ccae36f21e5d29627056afcd
This commit is contained in:
committed by
Facebook Github Bot
parent
2787f1d8ed
commit
c48e1679f9
@ -14,7 +14,7 @@ from caffe2.python.modeling.parameter_sharing import (
|
||||
)
|
||||
from caffe2.python.modeling.net_modifier import NetModifier
|
||||
|
||||
from caffe2.python.optimizer import get_param_device
|
||||
from caffe2.python.optimizer import get_param_device, Optimizer
|
||||
from caffe2.python.regularizer import Regularizer, RegularizationBy
|
||||
from caffe2.python.layers import layers
|
||||
from caffe2.proto import caffe2_pb2
|
||||
@ -228,6 +228,66 @@ class LayerModelHelper(model_helper.ModelHelper):
|
||||
scope.CurrentNameScope(), param_name, ref_shape, shape)
|
||||
)
|
||||
|
||||
def _validate_param_optim(self, param_name, optim):
|
||||
# there are three possible values for optim:
|
||||
# 1) None (which will use self._default_optimizer after this layer is instantiated)
|
||||
# 2) self.NoOptim
|
||||
# 3) an instance of Optimizer class such as AdagradOptimizer
|
||||
|
||||
# this implies this parameter is not shared with any other parameter so far
|
||||
if param_name not in self.param_to_optim:
|
||||
return
|
||||
|
||||
logger.info("{} shares the same parameter with another parameter. "
|
||||
"Validating if the same optimizer has been specified for them.".format(
|
||||
param_name,
|
||||
))
|
||||
|
||||
ref_optim = self.param_to_optim[param_name]
|
||||
|
||||
if optim is None:
|
||||
assert ref_optim == self._default_optimizer, (
|
||||
"Optim for {} is None which will fall back to use default_optimizer. "
|
||||
"However, the optimizer that has been specified for this shared parameter "
|
||||
"is {} which is different from default_optimizer {}. "
|
||||
"Please check the optimizers specified for parameters shared "
|
||||
"with {} and the default_optimizer to ensure the consistency.".format(
|
||||
param_name, ref_optim, self._default_optimizer, param_name
|
||||
)
|
||||
)
|
||||
elif optim == self.NoOptim:
|
||||
assert ref_optim == self.NoOptim, (
|
||||
"Optim for {} is NoOptim. However, the optimizer for the parameters "
|
||||
"shared with {} is {} which is different from NoOptim. "
|
||||
"Please check the optimizer specified for other parameters in the "
|
||||
"shared group to ensure consistency.".format(
|
||||
param_name, param_name, ref_optim
|
||||
)
|
||||
)
|
||||
elif isinstance(optim, Optimizer):
|
||||
assert isinstance(ref_optim, Optimizer), (
|
||||
"Optim for {} is an instance of Optimizer. However, the optimizer "
|
||||
"for the parameters shared with {} is {} which is not an instance "
|
||||
"of Optimizer. Please check the optimizer specified for other "
|
||||
" parameters in the shared group to ensure consistency.".format(
|
||||
param_name, param_name, ref_optim, optim
|
||||
)
|
||||
)
|
||||
|
||||
assert type(optim) is type(ref_optim) and optim.attributes == ref_optim.attributes, (
|
||||
"Optim for {} is an instance of Optimizer. However, the optimizer "
|
||||
"for the parameters shared with {} is {}. "
|
||||
"This optimizer either doesn't have the same type as the current optimizer: "
|
||||
"{} vs {}, or its attributes such as learning rate are different from "
|
||||
"that of current optimizer which is {} vs {}. "
|
||||
"Please check the optimizer specified for other parameters in the "
|
||||
"shared group to ensure consistency.".format(
|
||||
param_name, param_name, ref_optim, type(optim), type(ref_optim), optim.attributes, ref_optim.attributes
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise ValueError("optim should be either None, NoOptim, or an instance of Optimizer, Got {} ".format(optim))
|
||||
|
||||
def create_param(self, param_name, shape, initializer, optimizer=None,
|
||||
ps_param=None, regularizer=None):
|
||||
if isinstance(param_name, core.BlobReference):
|
||||
@ -270,6 +330,8 @@ class LayerModelHelper(model_helper.ModelHelper):
|
||||
|
||||
self._validate_param_shape(param_name, shape)
|
||||
|
||||
self._validate_param_optim(param_name, optimizer)
|
||||
|
||||
self._param_to_shape[param_name] = shape
|
||||
|
||||
return param
|
||||
|
@ -7,6 +7,7 @@ from caffe2.python import core, scope
|
||||
from caffe2.python.modeling.parameter_sharing import (
|
||||
ParameterSharing,
|
||||
)
|
||||
from caffe2.python.optimizer import AdagradOptimizer, AdamOptimizer
|
||||
from caffe2.python.layer_test_util import LayersTestCase
|
||||
import six
|
||||
|
||||
@ -149,3 +150,84 @@ class ParameterSharingTest(LayersTestCase):
|
||||
sorted(op_outputs),
|
||||
['global_scope/shared_fc/b', 'global_scope/shared_fc/w']
|
||||
)
|
||||
|
||||
def test_layer_shared_parameter_optim_validator(self):
|
||||
"""
|
||||
This test is to cover the _validate_param_optim function in
|
||||
layer_model_helper class.
|
||||
"""
|
||||
|
||||
output_dims = 2
|
||||
|
||||
adagrad_optim = AdagradOptimizer(
|
||||
alpha=0.004,
|
||||
epsilon=0.02,
|
||||
)
|
||||
|
||||
self.model.default_optimizer = adagrad_optim
|
||||
|
||||
# the following covers the branch -- optim is None
|
||||
with scope.NameScope('global_scope_0'):
|
||||
with ParameterSharing({'scope_1': 'scope_0'}):
|
||||
with scope.NameScope('scope_0'):
|
||||
fc1_output = self.model.FC(
|
||||
self.model.input_feature_schema.float_features,
|
||||
output_dims,
|
||||
weight_optim=self.model.NoOptim,
|
||||
)
|
||||
|
||||
with scope.NameScope('scope_1'), self.assertRaises(Exception):
|
||||
fc2_output = self.model.FC(
|
||||
self.model.input_feature_schema.float_features,
|
||||
output_dims
|
||||
)
|
||||
|
||||
# the following covers the branch -- optim is NoOptim
|
||||
with scope.NameScope('global_scope_1'):
|
||||
with ParameterSharing({'scope_1': 'scope_0'}):
|
||||
with scope.NameScope('scope_0'):
|
||||
fc1_output = self.model.FC(
|
||||
self.model.input_feature_schema.float_features,
|
||||
output_dims,
|
||||
weight_optim=None,
|
||||
)
|
||||
|
||||
with scope.NameScope('scope_1'), self.assertRaises(Exception):
|
||||
fc2_output = self.model.FC(
|
||||
self.model.input_feature_schema.float_features,
|
||||
output_dims,
|
||||
weight_optim=self.model.NoOptim,
|
||||
)
|
||||
|
||||
# the following covers the branch -- optim is an instance of Optimizer
|
||||
adagrad_optim_2 = AdagradOptimizer(
|
||||
alpha=0.005,
|
||||
epsilon=0.02,
|
||||
)
|
||||
|
||||
adam_optim = AdamOptimizer()
|
||||
|
||||
self.model.default_optimizer = adagrad_optim_2
|
||||
|
||||
with scope.NameScope('global_scope_2'):
|
||||
with ParameterSharing({'scope_1': 'scope_0', 'scope_2': 'scope_0'}):
|
||||
with scope.NameScope('scope_0'):
|
||||
fc1_output = self.model.FC(
|
||||
self.model.input_feature_schema.float_features,
|
||||
output_dims,
|
||||
weight_optim=None, # it will use adagrad_optim_2
|
||||
)
|
||||
|
||||
with scope.NameScope('scope_1'), self.assertRaises(Exception):
|
||||
fc2_output = self.model.FC(
|
||||
self.model.input_feature_schema.float_features,
|
||||
output_dims,
|
||||
weight_optim=adagrad_optim,
|
||||
)
|
||||
|
||||
with scope.NameScope('scope_2'), self.assertRaises(Exception):
|
||||
fc2_output = self.model.FC(
|
||||
self.model.input_feature_schema.float_features,
|
||||
output_dims,
|
||||
weight_optim=adam_optim,
|
||||
)
|
||||
|
@ -9,6 +9,7 @@ from collections import namedtuple, defaultdict
|
||||
from past.builtins import basestring
|
||||
|
||||
import logging
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
|
||||
@ -72,6 +73,13 @@ class Optimizer(object):
|
||||
classname, self._instance_num, base_str, node_name, gpu_id,
|
||||
)
|
||||
|
||||
@property
|
||||
def attributes(self):
|
||||
# return a dict that contains attributes related to init args only
|
||||
attr = copy.deepcopy(self.__dict__)
|
||||
del attr['_instance_num']
|
||||
return attr
|
||||
|
||||
def make_unique_blob_name(self, base_str):
|
||||
"""
|
||||
Returns a blob name that will be unique to the current device
|
||||
|
Reference in New Issue
Block a user