mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Rewrite Python built-in class `super()` calls. Only non-semantic changes should be applied. - #94587 - #94588 - #94592 Also, methods with only a `super()` call are removed: ```diff class MyModule(nn.Module): - def __init__(self): - super().__init__() - def forward(self, ...): ... ``` Some cases that change the semantics should be kept unchanged. E.g.:f152a79be9/caffe2/python/net_printer.py (L184-L190)
f152a79be9/test/test_jit_fuser_te.py (L2628-L2635)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/94587 Approved by: https://github.com/ezyang
145 lines
5.6 KiB
Python
145 lines
5.6 KiB
Python
|
|
|
|
|
|
|
|
|
|
from caffe2.python import schema
|
|
from caffe2.python.layers.arc_cosine_feature_map import ArcCosineFeatureMap
|
|
import numpy as np
|
|
|
|
|
|
class SemiRandomFeatures(ArcCosineFeatureMap):
|
|
"""
|
|
Implementation of the semi-random kernel feature map.
|
|
|
|
Applies H(x_rand) * x_rand^s * x_learned, where
|
|
H is the Heaviside step function,
|
|
x_rand is the input after applying FC with randomized parameters,
|
|
and x_learned is the input after applying FC with learnable parameters.
|
|
|
|
If using multilayer model with semi-random layers, then input and output records
|
|
should have a 'full' and 'random' Scalar. The random Scalar will be passed as
|
|
input to process the random features.
|
|
|
|
For more information, see the original paper:
|
|
https://arxiv.org/pdf/1702.08882.pdf
|
|
|
|
Inputs :
|
|
output_dims -- dimensions of the output vector
|
|
s -- if s == 0, will obtain linear semi-random features;
|
|
else if s == 1, will obtain squared semi-random features;
|
|
else s >= 2, will obtain higher order semi-random features
|
|
scale_random -- amount to scale the standard deviation
|
|
(for random parameter initialization when weight_init or
|
|
bias_init hasn't been specified)
|
|
scale_learned -- amount to scale the standard deviation
|
|
(for learned parameter initialization when weight_init or
|
|
bias_init hasn't been specified)
|
|
|
|
weight_init_random -- initialization distribution for random weight parameter
|
|
(if None, will use Gaussian distribution)
|
|
bias_init_random -- initialization distribution for random bias pararmeter
|
|
(if None, will use Uniform distribution)
|
|
weight_init_learned -- initialization distribution for learned weight parameter
|
|
(if None, will use Gaussian distribution)
|
|
bias_init_learned -- initialization distribution for learned bias pararmeter
|
|
(if None, will use Uniform distribution)
|
|
weight_optim -- optimizer for weight params for learned features
|
|
bias_optim -- optimizer for bias param for learned features
|
|
|
|
set_weight_as_global_constant -- if True, initialized random parameters
|
|
will be constant across all distributed
|
|
instances of the layer
|
|
"""
|
|
def __init__(
|
|
self,
|
|
model,
|
|
input_record,
|
|
output_dims,
|
|
s=1,
|
|
scale_random=1.0,
|
|
scale_learned=1.0,
|
|
weight_init_random=None,
|
|
bias_init_random=None,
|
|
weight_init_learned=None,
|
|
bias_init_learned=None,
|
|
weight_optim=None,
|
|
bias_optim=None,
|
|
set_weight_as_global_constant=False,
|
|
name='semi_random_features',
|
|
**kwargs):
|
|
|
|
if isinstance(input_record, schema.Struct):
|
|
schema.is_schema_subset(
|
|
schema.Struct(
|
|
('full', schema.Scalar()),
|
|
('random', schema.Scalar()),
|
|
),
|
|
input_record
|
|
)
|
|
self.input_record_full = input_record.full
|
|
self.input_record_random = input_record.random
|
|
|
|
elif isinstance(input_record, schema.Scalar):
|
|
self.input_record_full = input_record
|
|
self.input_record_random = input_record
|
|
|
|
super().__init__(
|
|
model,
|
|
self.input_record_full,
|
|
output_dims,
|
|
s=s,
|
|
scale=scale_random, # To initialize the random parameters
|
|
weight_init=weight_init_random,
|
|
bias_init=bias_init_random,
|
|
weight_optim=None,
|
|
bias_optim=None,
|
|
set_weight_as_global_constant=set_weight_as_global_constant,
|
|
initialize_output_schema=False,
|
|
name=name,
|
|
**kwargs)
|
|
|
|
self.output_schema = schema.Struct(
|
|
('full', schema.Scalar(
|
|
(np.float32, output_dims),
|
|
model.net.NextScopedBlob(name + '_full_output')
|
|
),),
|
|
('random', schema.Scalar(
|
|
(np.float32, output_dims),
|
|
model.net.NextScopedBlob(name + '_random_output')
|
|
),),
|
|
)
|
|
|
|
# To initialize the learnable parameters
|
|
assert (scale_learned > 0.0), \
|
|
"Expected scale (learned) > 0, got %s" % scale_learned
|
|
self.stddev = scale_learned * np.sqrt(1.0 / self.input_dims)
|
|
|
|
# Learned Parameters
|
|
(self.learned_w, self.learned_b) = self._initialize_params(
|
|
'learned_w',
|
|
'learned_b',
|
|
w_init=weight_init_learned,
|
|
b_init=bias_init_learned,
|
|
w_optim=weight_optim,
|
|
b_optim=bias_optim
|
|
)
|
|
|
|
def add_ops(self, net):
|
|
# Learned features: wx + b
|
|
learned_features = net.FC(self.input_record_full.field_blobs() +
|
|
[self.learned_w, self.learned_b],
|
|
net.NextScopedBlob('learned_features'))
|
|
# Random features: wx + b
|
|
random_features = net.FC(self.input_record_random.field_blobs() +
|
|
[self.random_w, self.random_b],
|
|
net.NextScopedBlob('random_features'))
|
|
processed_random_features = self._heaviside_with_power(
|
|
net,
|
|
random_features,
|
|
self.output_schema.random.field_blobs(),
|
|
self.s
|
|
)
|
|
net.Mul([processed_random_features, learned_features],
|
|
self.output_schema.full.field_blobs())
|