Add validator for optimizers when parameters are shared

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/18497

Reviewed By: kennyhorror

Differential Revision: D14614738

fbshipit-source-id: beddd8349827dcc8ccae36f21e5d29627056afcd
This commit is contained in:
Jiyan Yang
2019-04-17 21:07:42 -07:00
committed by Facebook Github Bot
parent 2787f1d8ed
commit c48e1679f9
3 changed files with 153 additions and 1 deletions

View File

@ -14,7 +14,7 @@ from caffe2.python.modeling.parameter_sharing import (
)
from caffe2.python.modeling.net_modifier import NetModifier
from caffe2.python.optimizer import get_param_device
from caffe2.python.optimizer import get_param_device, Optimizer
from caffe2.python.regularizer import Regularizer, RegularizationBy
from caffe2.python.layers import layers
from caffe2.proto import caffe2_pb2
@ -228,6 +228,66 @@ class LayerModelHelper(model_helper.ModelHelper):
scope.CurrentNameScope(), param_name, ref_shape, shape)
)
def _validate_param_optim(self, param_name, optim):
# there are three possible values for optim:
# 1) None (which will use self._default_optimizer after this layer is instantiated)
# 2) self.NoOptim
# 3) an instance of Optimizer class such as AdagradOptimizer
# this implies this parameter is not shared with any other parameter so far
if param_name not in self.param_to_optim:
return
logger.info("{} shares the same parameter with another parameter. "
"Validating if the same optimizer has been specified for them.".format(
param_name,
))
ref_optim = self.param_to_optim[param_name]
if optim is None:
assert ref_optim == self._default_optimizer, (
"Optim for {} is None which will fall back to use default_optimizer. "
"However, the optimizer that has been specified for this shared parameter "
"is {} which is different from default_optimizer {}. "
"Please check the optimizers specified for parameters shared "
"with {} and the default_optimizer to ensure the consistency.".format(
param_name, ref_optim, self._default_optimizer, param_name
)
)
elif optim == self.NoOptim:
assert ref_optim == self.NoOptim, (
"Optim for {} is NoOptim. However, the optimizer for the parameters "
"shared with {} is {} which is different from NoOptim. "
"Please check the optimizer specified for other parameters in the "
"shared group to ensure consistency.".format(
param_name, param_name, ref_optim
)
)
elif isinstance(optim, Optimizer):
assert isinstance(ref_optim, Optimizer), (
"Optim for {} is an instance of Optimizer. However, the optimizer "
"for the parameters shared with {} is {} which is not an instance "
"of Optimizer. Please check the optimizer specified for other "
" parameters in the shared group to ensure consistency.".format(
param_name, param_name, ref_optim, optim
)
)
assert type(optim) is type(ref_optim) and optim.attributes == ref_optim.attributes, (
"Optim for {} is an instance of Optimizer. However, the optimizer "
"for the parameters shared with {} is {}. "
"This optimizer either doesn't have the same type as the current optimizer: "
"{} vs {}, or its attributes such as learning rate are different from "
"that of current optimizer which is {} vs {}. "
"Please check the optimizer specified for other parameters in the "
"shared group to ensure consistency.".format(
param_name, param_name, ref_optim, type(optim), type(ref_optim), optim.attributes, ref_optim.attributes
)
)
else:
raise ValueError("optim should be either None, NoOptim, or an instance of Optimizer, Got {} ".format(optim))
def create_param(self, param_name, shape, initializer, optimizer=None,
ps_param=None, regularizer=None):
if isinstance(param_name, core.BlobReference):
@ -270,6 +330,8 @@ class LayerModelHelper(model_helper.ModelHelper):
self._validate_param_shape(param_name, shape)
self._validate_param_optim(param_name, optimizer)
self._param_to_shape[param_name] = shape
return param

View File

@ -7,6 +7,7 @@ from caffe2.python import core, scope
from caffe2.python.modeling.parameter_sharing import (
ParameterSharing,
)
from caffe2.python.optimizer import AdagradOptimizer, AdamOptimizer
from caffe2.python.layer_test_util import LayersTestCase
import six
@ -149,3 +150,84 @@ class ParameterSharingTest(LayersTestCase):
sorted(op_outputs),
['global_scope/shared_fc/b', 'global_scope/shared_fc/w']
)
def test_layer_shared_parameter_optim_validator(self):
"""
This test is to cover the _validate_param_optim function in
layer_model_helper class.
"""
output_dims = 2
adagrad_optim = AdagradOptimizer(
alpha=0.004,
epsilon=0.02,
)
self.model.default_optimizer = adagrad_optim
# the following covers the branch -- optim is None
with scope.NameScope('global_scope_0'):
with ParameterSharing({'scope_1': 'scope_0'}):
with scope.NameScope('scope_0'):
fc1_output = self.model.FC(
self.model.input_feature_schema.float_features,
output_dims,
weight_optim=self.model.NoOptim,
)
with scope.NameScope('scope_1'), self.assertRaises(Exception):
fc2_output = self.model.FC(
self.model.input_feature_schema.float_features,
output_dims
)
# the following covers the branch -- optim is NoOptim
with scope.NameScope('global_scope_1'):
with ParameterSharing({'scope_1': 'scope_0'}):
with scope.NameScope('scope_0'):
fc1_output = self.model.FC(
self.model.input_feature_schema.float_features,
output_dims,
weight_optim=None,
)
with scope.NameScope('scope_1'), self.assertRaises(Exception):
fc2_output = self.model.FC(
self.model.input_feature_schema.float_features,
output_dims,
weight_optim=self.model.NoOptim,
)
# the following covers the branch -- optim is an instance of Optimizer
adagrad_optim_2 = AdagradOptimizer(
alpha=0.005,
epsilon=0.02,
)
adam_optim = AdamOptimizer()
self.model.default_optimizer = adagrad_optim_2
with scope.NameScope('global_scope_2'):
with ParameterSharing({'scope_1': 'scope_0', 'scope_2': 'scope_0'}):
with scope.NameScope('scope_0'):
fc1_output = self.model.FC(
self.model.input_feature_schema.float_features,
output_dims,
weight_optim=None, # it will use adagrad_optim_2
)
with scope.NameScope('scope_1'), self.assertRaises(Exception):
fc2_output = self.model.FC(
self.model.input_feature_schema.float_features,
output_dims,
weight_optim=adagrad_optim,
)
with scope.NameScope('scope_2'), self.assertRaises(Exception):
fc2_output = self.model.FC(
self.model.input_feature_schema.float_features,
output_dims,
weight_optim=adam_optim,
)

View File

@ -9,6 +9,7 @@ from collections import namedtuple, defaultdict
from past.builtins import basestring
import logging
import copy
import numpy as np
@ -72,6 +73,13 @@ class Optimizer(object):
classname, self._instance_num, base_str, node_name, gpu_id,
)
@property
def attributes(self):
# return a dict that contains attributes related to init args only
attr = copy.deepcopy(self.__dict__)
del attr['_instance_num']
return attr
def make_unique_blob_name(self, base_str):
"""
Returns a blob name that will be unique to the current device