mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
Rewrite Python built-in class `super()` calls. Only non-semantic changes should be applied. - #94587 - #94588 - #94592 Also, methods with only a `super()` call are removed: ```diff class MyModule(nn.Module): - def __init__(self): - super().__init__() - def forward(self, ...): ... ``` Some cases that change the semantics should be kept unchanged. E.g.:f152a79be9/caffe2/python/net_printer.py (L184-L190)
f152a79be9/test/test_jit_fuser_te.py (L2628-L2635)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/94587 Approved by: https://github.com/ezyang
364 lines
12 KiB
Python
364 lines
12 KiB
Python
## @package fc_with_bootstrap
|
|
# Module caffe2.python.layers.fc_with_bootstrap
|
|
|
|
|
|
import math
|
|
|
|
import numpy as np
|
|
from caffe2.python import core, schema
|
|
from caffe2.python.helpers.arg_scope import get_current_scope
|
|
from caffe2.python.layers.layers import ModelLayer
|
|
from caffe2.python.layers.sampling_trainable_mixin import SamplingTrainableMixin
|
|
|
|
|
|
def get_fc_predictor_version(fc_version):
|
|
assert fc_version in ["fp32"], (
|
|
"Only support fp32 for the fully connected layer "
|
|
"in the predictor net, the provided FC precision is {}".format(fc_version)
|
|
)
|
|
return fc_version
|
|
|
|
|
|
class FCWithBootstrap(SamplingTrainableMixin, ModelLayer):
|
|
def __init__(
|
|
self,
|
|
model,
|
|
input_record,
|
|
output_dims,
|
|
num_bootstrap,
|
|
weight_init=None,
|
|
bias_init=None,
|
|
weight_optim=None,
|
|
bias_optim=None,
|
|
name="fc_with_bootstrap",
|
|
weight_reg=None,
|
|
bias_reg=None,
|
|
clip_param=None,
|
|
axis=1,
|
|
**kwargs
|
|
):
|
|
super().__init__(model, name, input_record, **kwargs)
|
|
assert isinstance(
|
|
input_record, schema.Scalar
|
|
), "Incorrect input type {}".format(input_record)
|
|
assert (
|
|
len(input_record.field_types()[0].shape) > 0
|
|
), "FC expects limited dimensions of the input tensor"
|
|
assert axis >= 1, "axis {} should >= 1.".format(axis)
|
|
self.axis = axis
|
|
input_dims = np.prod(input_record.field_types()[0].shape[axis - 1 :])
|
|
|
|
assert input_dims > 0, "FC expects input dimensions > 0, got {}".format(
|
|
input_dims
|
|
)
|
|
|
|
self.clip_args = None
|
|
|
|
# attributes for bootstrapping below
|
|
self.num_bootstrap = num_bootstrap
|
|
|
|
# input dim shape
|
|
self.input_dims = input_dims
|
|
|
|
# bootstrapped fully-connected layers to be used in eval time
|
|
self.bootstrapped_FCs = []
|
|
|
|
# scalar containing batch_size blob so that we don't need to recompute
|
|
self.batch_size = None
|
|
|
|
# we want this to be the last FC, so the output_dim should be 1, set to None
|
|
self.output_dim_vec = None
|
|
|
|
# lower bound when creating random indices
|
|
self.lower_bound = None
|
|
|
|
# upper bound when creating random indices
|
|
self.upper_bound = None
|
|
|
|
if clip_param is not None:
|
|
assert len(clip_param) == 2, (
|
|
"clip_param must be a tuple / list "
|
|
"of length 2 and in the form of (clip_min, clip max)"
|
|
)
|
|
clip_min, clip_max = clip_param
|
|
assert (
|
|
clip_min is not None or clip_max is not None
|
|
), "clip_min, and clip_max in clip_param cannot both be None"
|
|
assert (
|
|
clip_min is None or clip_max is None
|
|
) or clip_min < clip_max, (
|
|
"clip_param = [clip_min, clip_max] must have clip_min < clip_max"
|
|
)
|
|
self.clip_args = {}
|
|
if clip_min is not None:
|
|
self.clip_args["min"] = clip_min
|
|
if clip_max is not None:
|
|
self.clip_args["max"] = clip_max
|
|
|
|
scale = math.sqrt(1.0 / input_dims)
|
|
weight_init = (
|
|
weight_init
|
|
if weight_init
|
|
else ("UniformFill", {"min": -scale, "max": scale})
|
|
)
|
|
bias_init = (
|
|
bias_init if bias_init else ("UniformFill", {"min": -scale, "max": scale})
|
|
)
|
|
|
|
"""
|
|
bootstrapped FCs:
|
|
Ex: [
|
|
bootstrapped_weights_blob_1, bootstrapped_bias_blob_1,
|
|
...,
|
|
...,
|
|
bootstrapped_weights_blob_b, bootstrapped_bias_blob_b
|
|
]
|
|
|
|
output_schema:
|
|
Note: indices will always be on even indices.
|
|
Ex: Struct(
|
|
indices_0_blob,
|
|
preds_0_blob,
|
|
...
|
|
...
|
|
indices_b_blob,
|
|
preds_b_blob
|
|
)
|
|
"""
|
|
bootstrapped_FCs = []
|
|
output_schema = schema.Struct()
|
|
for i in range(num_bootstrap):
|
|
output_schema += schema.Struct(
|
|
(
|
|
"bootstrap_iteration_{}/indices".format(i),
|
|
self.get_next_blob_reference(
|
|
"bootstrap_iteration_{}/indices".format(i)
|
|
),
|
|
),
|
|
(
|
|
"bootstrap_iteration_{}/preds".format(i),
|
|
self.get_next_blob_reference(
|
|
"bootstrap_iteration_{}/preds".format(i)
|
|
),
|
|
),
|
|
)
|
|
self.bootstrapped_FCs.extend(
|
|
[
|
|
self.create_param(
|
|
param_name="bootstrap_iteration_{}/w".format(i),
|
|
shape=[output_dims, input_dims],
|
|
initializer=weight_init,
|
|
optimizer=weight_optim,
|
|
regularizer=weight_reg,
|
|
),
|
|
self.create_param(
|
|
param_name="bootstrap_iteration_{}/b".format(i),
|
|
shape=[output_dims],
|
|
initializer=bias_init,
|
|
optimizer=bias_optim,
|
|
regularizer=bias_reg,
|
|
),
|
|
]
|
|
)
|
|
|
|
self.output_schema = output_schema
|
|
|
|
if axis == 1:
|
|
output_shape = (output_dims,)
|
|
else:
|
|
output_shape = list(input_record.field_types()[0].shape)[0 : axis - 1]
|
|
output_shape = tuple(output_shape + [output_dims])
|
|
|
|
def _generate_bootstrapped_indices(self, net, copied_cur_layer, iteration):
|
|
"""
|
|
Args:
|
|
net: the caffe2 net to insert operator
|
|
|
|
copied_cur_layer: blob of the bootstrapped features (make sure this
|
|
blob has a stop_gradient on)
|
|
|
|
iteration: the bootstrap interation to generate for. Used to correctly
|
|
populate the output_schema
|
|
|
|
Return:
|
|
A blob containing the generated indices of shape: (batch_size,)
|
|
"""
|
|
with core.NameScope("bootstrap_iteration_{}".format(iteration)):
|
|
if iteration == 0:
|
|
# capture batch_size once for efficiency
|
|
input_shape = net.Shape(copied_cur_layer, "input_shape")
|
|
batch_size_index = net.Const(np.array([0]), "batch_size_index")
|
|
batch_size = net.Gather([input_shape, batch_size_index], "batch_size")
|
|
self.batch_size = batch_size
|
|
|
|
lower_bound = net.Const(np.array([0]), "lower_bound", dtype=np.int32)
|
|
offset = net.Const(np.array([1]), "offset", dtype=np.int32)
|
|
int_batch_size = net.Cast(
|
|
[self.batch_size], "int_batch_size", to=core.DataType.INT32
|
|
)
|
|
upper_bound = net.Sub([int_batch_size, offset], "upper_bound")
|
|
|
|
self.lower_bound = lower_bound
|
|
self.upper_bound = upper_bound
|
|
|
|
indices = net.UniformIntFill(
|
|
[self.batch_size, self.lower_bound, self.upper_bound],
|
|
self.output_schema[iteration * 2].field_blobs()[0],
|
|
input_as_shape=1,
|
|
)
|
|
|
|
return indices
|
|
|
|
def _bootstrap_ops(self, net, copied_cur_layer, indices, iteration):
|
|
"""
|
|
This method contains all the bootstrapping logic used to bootstrap
|
|
the features. Only used by the train_net.
|
|
|
|
Args:
|
|
net: the caffe2 net to insert bootstrapping operators
|
|
|
|
copied_cur_layer: the blob representing the current features.
|
|
Note, this layer should have a stop_gradient on it.
|
|
|
|
Returns:
|
|
bootstrapped_features: blob of bootstrapped version of cur_layer
|
|
with same dimensions
|
|
"""
|
|
|
|
# draw features based upon the bootstrapped indices
|
|
bootstrapped_features = net.Gather(
|
|
[copied_cur_layer, indices],
|
|
net.NextScopedBlob("bootstrapped_features_{}".format(iteration)),
|
|
)
|
|
|
|
bootstrapped_features = schema.Scalar(
|
|
(np.float32, self.input_dims), bootstrapped_features
|
|
)
|
|
|
|
return bootstrapped_features
|
|
|
|
def _insert_fc_ops(self, net, features, params, outputs, version):
|
|
"""
|
|
Args:
|
|
net: the caffe2 net to insert operator
|
|
|
|
features: Scalar containing blob of the bootstrapped features or
|
|
actual cur_layer features
|
|
|
|
params: weight and bias for FC
|
|
|
|
outputs: the output blobs
|
|
|
|
version: support fp32 for now.
|
|
"""
|
|
|
|
if version == "fp32":
|
|
pred_blob = net.FC(
|
|
features.field_blobs() + params, outputs, axis=self.axis, **self.kwargs
|
|
)
|
|
return pred_blob
|
|
else:
|
|
raise Exception("unsupported FC type version {}".format(version))
|
|
|
|
def _add_ops(self, net, features, iteration, params, version):
|
|
"""
|
|
Args:
|
|
params: the weight and bias, passed by either add_ops or
|
|
add_train_ops function
|
|
|
|
features: feature blobs to predict on. Can be the actual cur_layer
|
|
or the bootstrapped_feature blobs.
|
|
|
|
version: currently fp32 support only
|
|
"""
|
|
|
|
if self.clip_args is not None:
|
|
clipped_params = [net.NextScopedBlob("clipped_%s" % str(p)) for p in params]
|
|
for p, cp in zip(params, clipped_params):
|
|
net.Clip([p], [cp], **self.clip_args)
|
|
params = clipped_params
|
|
|
|
if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
|
|
self._insert_fc_ops(
|
|
net=net,
|
|
features=features,
|
|
params=params,
|
|
outputs=[self.output_schema.field_blobs()[(iteration * 2) + 1]],
|
|
version=version,
|
|
)
|
|
|
|
def add_ops(self, net):
|
|
"""
|
|
Both the predict net and the eval net will call this function.
|
|
|
|
For bootstrapping approach, the goal is to pass the cur_layer feature
|
|
inputs through all the bootstrapped FCs that are stored under
|
|
self.bootstrapped_FCs. Return the preds in the same output_schema
|
|
with dummy indices (because they are not needed).
|
|
"""
|
|
|
|
version_info = get_current_scope().get(
|
|
get_fc_predictor_version.__name__, {"fc_version": "fp32"}
|
|
)
|
|
predictor_fc_fp_version = version_info["fc_version"]
|
|
|
|
for i in range(self.num_bootstrap):
|
|
# these are dummy indices, not to be used anywhere
|
|
indices = self._generate_bootstrapped_indices(
|
|
net=net,
|
|
copied_cur_layer=self.input_record.field_blobs()[0],
|
|
iteration=i,
|
|
)
|
|
|
|
params = self.bootstrapped_FCs[i * 2 : (i * 2) + 2]
|
|
|
|
self._add_ops(
|
|
net=net,
|
|
features=self.input_record,
|
|
params=params,
|
|
iteration=i,
|
|
version=predictor_fc_fp_version,
|
|
)
|
|
|
|
def add_train_ops(self, net):
|
|
# use the train_param_blobs to be consistent with the SamplingTrain unittest
|
|
|
|
# obtain features
|
|
for i in range(self.num_bootstrap):
|
|
indices = self._generate_bootstrapped_indices(
|
|
net=net,
|
|
copied_cur_layer=self.input_record.field_blobs()[0],
|
|
iteration=i,
|
|
)
|
|
bootstrapped_features = self._bootstrap_ops(
|
|
net=net,
|
|
copied_cur_layer=self.input_record.field_blobs()[0],
|
|
indices=indices,
|
|
iteration=i,
|
|
)
|
|
self._add_ops(
|
|
net,
|
|
features=bootstrapped_features,
|
|
iteration=i,
|
|
params=self.train_param_blobs[i * 2 : (i * 2) + 2],
|
|
version="fp32",
|
|
)
|
|
|
|
def get_fp16_compatible_parameters(self):
|
|
if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
|
|
return [
|
|
blob for idx, blob in enumerate(self.bootstrapped_FCs) if idx % 2 == 0
|
|
]
|
|
|
|
else:
|
|
raise Exception(
|
|
"Currently only supports functionality for output_dim_vec == 1"
|
|
)
|
|
|
|
@property
|
|
def param_blobs(self):
|
|
if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
|
|
return self.bootstrapped_FCs
|
|
else:
|
|
raise Exception("FCWithBootstrap layer only supports output_dim_vec==1")
|