pytorch/caffe2/python/layers/fc_with_bootstrap.py

## @package fc_with_bootstrap
# Module caffe2.python.layers.fc_with_bootstrap


import math

import numpy as np
from caffe2.python import core, schema
from caffe2.python.helpers.arg_scope import get_current_scope
from caffe2.python.layers.layers import ModelLayer
from caffe2.python.layers.sampling_trainable_mixin import SamplingTrainableMixin


def get_fc_predictor_version(fc_version):
    assert fc_version in ["fp32"], (
        "Only support fp32 for the fully connected layer "
        "in the predictor net, the provided FC precision is {}".format(fc_version)
    )
    return fc_version


class FCWithBootstrap(SamplingTrainableMixin, ModelLayer):
    def __init__(
        self,
        model,
        input_record,
        output_dims,
        num_bootstrap,
        weight_init=None,
        bias_init=None,
        weight_optim=None,
        bias_optim=None,
        name="fc_with_bootstrap",
        weight_reg=None,
        bias_reg=None,
        clip_param=None,
        axis=1,
        **kwargs
    ):
        super().__init__(model, name, input_record, **kwargs)
        assert isinstance(
            input_record, schema.Scalar
        ), "Incorrect input type {}".format(input_record)
        assert (
            len(input_record.field_types()[0].shape) > 0
        ), "FC expects limited dimensions of the input tensor"
        assert axis >= 1, "axis {} should >= 1.".format(axis)
        self.axis = axis
        input_dims = np.prod(input_record.field_types()[0].shape[axis - 1 :])

        assert input_dims > 0, "FC expects input dimensions > 0, got {}".format(
            input_dims
        )

        self.clip_args = None

        # attributes for bootstrapping below
        self.num_bootstrap = num_bootstrap

        # input dim shape
        self.input_dims = input_dims

        # bootstrapped fully-connected layers to be used in eval time
        self.bootstrapped_FCs = []

        # scalar containing batch_size blob so that we don't need to recompute
        self.batch_size = None

        # we want this to be the last FC, so the output_dim should be 1, set to None
        self.output_dim_vec = None

        # lower bound when creating random indices
        self.lower_bound = None

        # upper bound when creating random indices
        self.upper_bound = None

        if clip_param is not None:
            assert len(clip_param) == 2, (
                "clip_param must be a tuple / list "
                "of length 2 and in the form of (clip_min, clip max)"
            )
            clip_min, clip_max = clip_param
            assert (
                clip_min is not None or clip_max is not None
            ), "clip_min, and clip_max in clip_param cannot both be None"
            assert (
                clip_min is None or clip_max is None
            ) or clip_min < clip_max, (
                "clip_param = [clip_min, clip_max] must have clip_min < clip_max"
            )
            self.clip_args = {}
            if clip_min is not None:
                self.clip_args["min"] = clip_min
            if clip_max is not None:
                self.clip_args["max"] = clip_max

        scale = math.sqrt(1.0 / input_dims)
        weight_init = (
            weight_init
            if weight_init
            else ("UniformFill", {"min": -scale, "max": scale})
        )
        bias_init = (
            bias_init if bias_init else ("UniformFill", {"min": -scale, "max": scale})
        )

        """
        bootstrapped FCs:
            Ex: [
                bootstrapped_weights_blob_1, bootstrapped_bias_blob_1,
                ...,
                ...,
                bootstrapped_weights_blob_b, bootstrapped_bias_blob_b
                ]

        output_schema:
            Note: indices will always be on even indices.
            Ex: Struct(
                    indices_0_blob,
                    preds_0_blob,
                    ...
                    ...
                    indices_b_blob,
                    preds_b_blob
                )
        """
        bootstrapped_FCs = []
        output_schema = schema.Struct()
        for i in range(num_bootstrap):
            output_schema += schema.Struct(
                (
                    "bootstrap_iteration_{}/indices".format(i),
                    self.get_next_blob_reference(
                        "bootstrap_iteration_{}/indices".format(i)
                    ),
                ),
                (
                    "bootstrap_iteration_{}/preds".format(i),
                    self.get_next_blob_reference(
                        "bootstrap_iteration_{}/preds".format(i)
                    ),
                ),
            )
            self.bootstrapped_FCs.extend(
                [
                    self.create_param(
                        param_name="bootstrap_iteration_{}/w".format(i),
                        shape=[output_dims, input_dims],
                        initializer=weight_init,
                        optimizer=weight_optim,
                        regularizer=weight_reg,
                    ),
                    self.create_param(
                        param_name="bootstrap_iteration_{}/b".format(i),
                        shape=[output_dims],
                        initializer=bias_init,
                        optimizer=bias_optim,
                        regularizer=bias_reg,
                    ),
                ]
            )

        self.output_schema = output_schema

        if axis == 1:
            output_shape = (output_dims,)
        else:
            output_shape = list(input_record.field_types()[0].shape)[0 : axis - 1]
            output_shape = tuple(output_shape + [output_dims])

    def _generate_bootstrapped_indices(self, net, copied_cur_layer, iteration):
        """
        Args:
            net: the caffe2 net to insert operator

            copied_cur_layer: blob of the bootstrapped features (make sure this
            blob has a stop_gradient on)

            iteration: the bootstrap interation to generate for. Used to correctly
            populate the output_schema

        Return:
            A blob containing the generated indices of shape: (batch_size,)
        """
        with core.NameScope("bootstrap_iteration_{}".format(iteration)):
            if iteration == 0:
                # capture batch_size once for efficiency
                input_shape = net.Shape(copied_cur_layer, "input_shape")
                batch_size_index = net.Const(np.array([0]), "batch_size_index")
                batch_size = net.Gather([input_shape, batch_size_index], "batch_size")
                self.batch_size = batch_size

                lower_bound = net.Const(np.array([0]), "lower_bound", dtype=np.int32)
                offset = net.Const(np.array([1]), "offset", dtype=np.int32)
                int_batch_size = net.Cast(
                    [self.batch_size], "int_batch_size", to=core.DataType.INT32
                )
                upper_bound = net.Sub([int_batch_size, offset], "upper_bound")

                self.lower_bound = lower_bound
                self.upper_bound = upper_bound

            indices = net.UniformIntFill(
                [self.batch_size, self.lower_bound, self.upper_bound],
                self.output_schema[iteration * 2].field_blobs()[0],
                input_as_shape=1,
            )

            return indices

    def _bootstrap_ops(self, net, copied_cur_layer, indices, iteration):
        """
            This method contains all the bootstrapping logic used to bootstrap
            the features. Only used by the train_net.

            Args:
                net: the caffe2 net to insert bootstrapping operators

                copied_cur_layer: the blob representing the current features.
                    Note, this layer should have a stop_gradient on it.

            Returns:
                bootstrapped_features: blob of bootstrapped version of cur_layer
                    with same dimensions
        """

        # draw features based upon the bootstrapped indices
        bootstrapped_features = net.Gather(
            [copied_cur_layer, indices],
            net.NextScopedBlob("bootstrapped_features_{}".format(iteration)),
        )

        bootstrapped_features = schema.Scalar(
            (np.float32, self.input_dims), bootstrapped_features
        )

        return bootstrapped_features

    def _insert_fc_ops(self, net, features, params, outputs, version):
        """
        Args:
            net: the caffe2 net to insert operator

            features: Scalar containing blob of the bootstrapped features or
            actual cur_layer features

            params: weight and bias for FC

            outputs: the output blobs

            version: support fp32 for now.
        """

        if version == "fp32":
            pred_blob = net.FC(
                features.field_blobs() + params, outputs, axis=self.axis, **self.kwargs
            )
            return pred_blob
        else:
            raise Exception("unsupported FC type version {}".format(version))

    def _add_ops(self, net, features, iteration, params, version):
        """
        Args:
            params: the weight and bias, passed by either add_ops or
            add_train_ops function

            features: feature blobs to predict on. Can be the actual cur_layer
            or the bootstrapped_feature blobs.

            version: currently fp32 support only
        """

        if self.clip_args is not None:
            clipped_params = [net.NextScopedBlob("clipped_%s" % str(p)) for p in params]
            for p, cp in zip(params, clipped_params):
                net.Clip([p], [cp], **self.clip_args)
            params = clipped_params

        if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
            self._insert_fc_ops(
                net=net,
                features=features,
                params=params,
                outputs=[self.output_schema.field_blobs()[(iteration * 2) + 1]],
                version=version,
            )

    def add_ops(self, net):
        """
            Both the predict net and the eval net will call this function.

            For bootstrapping approach, the goal is to pass the cur_layer feature
            inputs through all the bootstrapped FCs that are stored under
            self.bootstrapped_FCs. Return the preds in the same output_schema
            with dummy indices (because they are not needed).
        """

        version_info = get_current_scope().get(
            get_fc_predictor_version.__name__, {"fc_version": "fp32"}
        )
        predictor_fc_fp_version = version_info["fc_version"]

        for i in range(self.num_bootstrap):
            # these are dummy indices, not to be used anywhere
            indices = self._generate_bootstrapped_indices(
                net=net,
                copied_cur_layer=self.input_record.field_blobs()[0],
                iteration=i,
            )

            params = self.bootstrapped_FCs[i * 2 : (i * 2) + 2]

            self._add_ops(
                net=net,
                features=self.input_record,
                params=params,
                iteration=i,
                version=predictor_fc_fp_version,
            )

    def add_train_ops(self, net):
        # use the train_param_blobs to be consistent with the SamplingTrain unittest

        # obtain features
        for i in range(self.num_bootstrap):
            indices = self._generate_bootstrapped_indices(
                net=net,
                copied_cur_layer=self.input_record.field_blobs()[0],
                iteration=i,
            )
            bootstrapped_features = self._bootstrap_ops(
                net=net,
                copied_cur_layer=self.input_record.field_blobs()[0],
                indices=indices,
                iteration=i,
            )
            self._add_ops(
                net,
                features=bootstrapped_features,
                iteration=i,
                params=self.train_param_blobs[i * 2 : (i * 2) + 2],
                version="fp32",
            )

    def get_fp16_compatible_parameters(self):
        if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
            return [
                blob for idx, blob in enumerate(self.bootstrapped_FCs) if idx % 2 == 0
            ]

        else:
            raise Exception(
                "Currently only supports functionality for output_dim_vec == 1"
            )

    @property
    def param_blobs(self):
        if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
            return self.bootstrapped_FCs
        else:
            raise Exception("FCWithBootstrap layer only supports output_dim_vec==1")