pytorch/caffe2/python/helpers/train.py

## @package train
# Module caffe2.python.helpers.train


from caffe2.python import core, scope
from caffe2.proto import caffe2_pb2


def _get_weights(model, namescope=None):
    if namescope is None:
        namescope = scope.CurrentNameScope()

    if namescope == '':
        return model.weights[:]
    else:
        return [w for w in model.weights if w.GetNameScope() == namescope]


def iter(model, blob_out, **kwargs):
    if 'device_option' in kwargs:
        del kwargs['device_option']
    model.param_init_net.ConstantFill(
        [],
        blob_out,
        shape=[1],
        value=0,
        dtype=core.DataType.INT64,
        device_option=core.DeviceOption(caffe2_pb2.CPU, 0),
        **kwargs
    )
    return model.net.Iter(blob_out, blob_out, **kwargs)


def accuracy(model, blob_in, blob_out, **kwargs):
    dev = kwargs['device_option'] if 'device_option' in kwargs \
        else scope.CurrentDeviceScope()
    is_cpu = dev is None or dev.device_type == caffe2_pb2.CPU

    # We support top_k > 1 only on CPU
    if not is_cpu and 'top_k' in kwargs and kwargs['top_k'] > 1:
        pred_host = model.net.CopyGPUToCPU(blob_in[0], blob_in[0] + "_host")
        label_host = model.net.CopyGPUToCPU(blob_in[1], blob_in[1] + "_host")

        # Now use the Host version of the accuracy op
        model.net.Accuracy(
            [pred_host, label_host],
            blob_out,
            device_option=core.DeviceOption(caffe2_pb2.CPU, 0),
            **kwargs
        )
    else:
        model.net.Accuracy(blob_in, blob_out)


def add_weight_decay(model, weight_decay):
    """Adds a decay to weights in the model.

    This is a form of L2 regularization.

    Args:
        weight_decay: strength of the regularization
    """
    if weight_decay <= 0.0:
        return
    wd = model.param_init_net.ConstantFill(
        [], 'wd', shape=[1], value=weight_decay
    )
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    for param in _get_weights(model):
        #  Equivalent to: grad += wd * param
        grad = model.param_to_grad[param]
        model.net.WeightedSum(
            [grad, ONE, param, wd],
            grad,
        )