## @package gradient_checker
# Module caffe2.python.gradient_checker


import os
import numpy as np

from caffe2.python import core, workspace, net_drawer
from caffe2.proto import caffe2_pb2


def getGradientForOp(op):
    return core.GradientRegistry.GetGradientForOp(
        op, [s + '_grad' for s in op.output])


def _get_grad_blob(grad_map, input_to_check):
    grad_blob = grad_map[input_to_check]

    if isinstance(grad_blob, core.BlobReference):
        return workspace.blobs[grad_blob]

    # If grad_blob is not a single blob, it should be a gradient slice.
    # To make it comparable with the estimiated gradient which is dense,
    # we need to first convert grad_blob to dense gradient.
    assert isinstance(grad_blob, core.GradientSlice)
    dense_grad = 'tmp_dense_grad'
    sparse_to_dense_op = core.CreateOperator(
        'SparseToDense',
        [grad_blob.indices, grad_blob.values, input_to_check],
        dense_grad,
    )
    workspace.RunOperatorOnce(sparse_to_dense_op)
    return workspace.blobs[dense_grad]


def _get_grad(net, outputs, outputs_with_grad, input_values, inputs_with_grads):
    grad_net = net.Clone(net.Name() + "_copy")
    grad_map = grad_net.AddGradientOperators(outputs_with_grad)

    for name, value in (input_values or {}).items():
        workspace.blobs[name] = value

    for input_to_check in inputs_with_grads:
        assert input_to_check in grad_map, (
            '{} has no gradient, cannot check net gradient.'.format(
                input_to_check))
        assert str(input_to_check) in workspace.blobs

    workspace.RunNetOnce(grad_net)
    forward_results = [(output, workspace.blobs[output]) for output in outputs]
    grads = {input_to_check: _get_grad_blob(grad_map, input_to_check)
             for input_to_check in inputs_with_grads}

    return forward_results, grads, grad_net


def _assert_close(value1, value2, threshold, err_msg=''):
    np.testing.assert_allclose(
        value1, value2,
        atol=threshold, rtol=threshold,
        err_msg=err_msg,
    )

    delta = np.abs(value1 - value2).flatten()
    return np.mean(delta), max(delta)


class NetGradientChecker(object):
    @staticmethod
    def CompareNets(nets, outputs, outputs_with_grad_ids,
                    inputs_with_grads, input_values=None,
                    threshold=0.0000001, print_net_images=False):
        def _get_output_with_grad_names(net_outputs):
            return [net_outputs[i] for i in outputs_with_grad_ids]

        if print_net_images:
            for i, net in enumerate(nets):
                png = net_drawer.GetPydotGraph(net).create_png()
                with open("caffe2_net_forward_" + str(i) + net.Name() + ".png",
                          'wb') \
                        as f:
                    f.write(png)

        results = [
            _get_grad(net, net_outputs,
                      _get_output_with_grad_names(net_outputs),
                      input_values, inputs_with_grads)
            for net, net_outputs in zip(nets, outputs)
        ]

        if print_net_images:
            _, _, backward_nets = zip(*results)
            for i, net in enumerate(backward_nets):
                png = net_drawer.GetPydotGraph(net).create_png()
                with open("caffe2_net_" + str(i) + net.Name() + ".png", 'wb') \
                        as f:
                    f.write(png)

        first_net_results, first_net_grads, _ = results[0]
        for net_results, net_grads, _ in results[1:]:
            assert len(net_results) == len(first_net_results)
            for idx, ((blob1, blob_value1), (blob2, blob_value2)) in enumerate(
                    zip(first_net_results, net_results)):
                _assert_close(
                    blob_value1, blob_value2, threshold,
                    err_msg="Different forward pass results for output id {}. "
                    "Corresponding output blobs: {} and {}".format(
                        idx, blob1, blob2))

            assert net_grads.keys() == first_net_grads.keys()
            for blob, blob_grad_value in net_grads.items():
                _assert_close(
                    first_net_grads[blob], blob_grad_value, threshold,
                    err_msg="Different gradients for input {}".format(blob))

    @staticmethod
    def Check(net, outputs_with_grad, input_values,
              input_to_check, step_size=0.0001,
              threshold=0.05, print_net=True):

        net_results, net_grads, full_net = _get_grad(
            net, [], outputs_with_grad, input_values, [input_to_check])
        analytic_grad = net_grads[input_to_check]

        def GetLoss(new_value):
            workspace.blobs[input_to_check] = new_value
            workspace.RunNetOnce(full_net)
            return sum([
                workspace.blobs[output]
                for output in outputs_with_grad
            ]).sum()

        def GetValue(dim, delta):
            input_value = input_values[input_to_check].copy()
            input_value.flat[dim] += delta
            return input_value

        grad_estimate = np.zeros_like(input_values[input_to_check])
        for dim in range(input_values[input_to_check].size):
            pos_loss = GetLoss(GetValue(dim, step_size))
            neg_loss = GetLoss(GetValue(dim, -step_size))
            grad_estimate.flat[dim] = (pos_loss - neg_loss) / step_size / 2

        err_msg = "Error in gradient check for net_copy {}".format(
            net.Name())
        if print_net:
            err_msg += ": {}".format(net.Proto())

        return _assert_close(analytic_grad, grad_estimate, threshold, err_msg)


class GradientChecker:
    """A gradient checker in Python.

    This is not the most efficient way to check gradients, as the Python
    interface will involve a lot of copies back and forth operations. Use at your
    own risk.
    """

    def __init__(
        self,
        stepsize,
        threshold,
        device_option=None,
        workspace_name="gradient_check",
        input_device_options=None,
    ):
        self._stepsize = stepsize
        self._threshold = threshold
        self._device_option = device_option or caffe2_pb2.DeviceOption()
        self._workspace_name = workspace_name
        if input_device_options is None:
            self._input_device_options = {}
        else:
            self._input_device_options = input_device_options

    def GetLossAndGrad(
        self, op, grad_ops, inputs, input_names, input_to_check, grad_name,
        outputs_with_grads
    ):
        for i in range(len(inputs)):
            workspace.FeedBlob(input_names[i], inputs[i],
                               self._input_device_options.get(
                input_names[i], self._device_option))
        x = inputs[input_to_check]
        # Run.
        workspace.RunOperatorOnce(op)
        loss = 0.
        # Get Loss and feed in the gradients, run gradient ops.
        for idx in outputs_with_grads:
            name = op.output[idx]
            arr = workspace.FetchBlob(name)
            loss += (arr**2).sum()
            workspace.FeedBlob(name + '_grad', arr, self._device_option)
        loss /= 2.
        # Run gradient ops
        workspace.RunOperatorsOnce(grad_ops)
        # Get gradients
        if isinstance(grad_name, core.GradientSlice):
            workspace.FeedBlob('zeros', np.zeros_like(x, dtype=np.float32))
            workspace.FeedBlob('ones', np.ones(1, dtype=np.float32))
            gv_cpu_op = core.CreateOperator(
                'EnsureCPUOutput', grad_name.values, grad_name.values + '_cpu',
                device_option=self._device_option
            )
            gi_cpu_op = core.CreateOperator(
                'EnsureCPUOutput', grad_name.indices, grad_name.indices + '_cpu',
                device_option=self._device_option
            )
            sparse_to_dense_op = core.CreateOperator(
                'ScatterWeightedSum',
                [
                    'zeros', 'ones', grad_name.indices + '_cpu',
                    grad_name.values + '_cpu', 'ones'
                ],
                'zeros',
            )
            workspace.RunOperatorOnce(gv_cpu_op)
            workspace.RunOperatorOnce(gi_cpu_op)
            workspace.RunOperatorOnce(sparse_to_dense_op)
            grad = workspace.FetchBlob('zeros')
        else:
            grad = workspace.FetchBlob(grad_name)
        return loss, grad

    def CheckSimple(
        self,
        op,
        inputs,
        input_to_check,
        outputs_with_grads,
        grad_ops=None,
        input_device_options=None,
        ensure_outputs_are_inferred=False,
    ):
        """Checks the operator in a very simple fashion by stacking a sum of
        squares on the top.

        Inputs:
          op: the operator to be checked.
          inputs: the input data in numpy arrays.
          input_to_check: an index specifying which input blob we should
              check.
          outputs_with_grads: indices specifying which output blobs will we
              need to check gradients with. For these outputs, we will collect a
              squared sum and also feed in their gradients.
          grad_operator: the gradient operator. If not given, we will get the
              gradient operator from the gradient registry.
          input_device_options: an optional mapping from input names to
              DeviceOptions (to override the default DeviceOption)
          ensure_outputs_are_inferred: if set will assert that the gradient output
              shapes matches the inferred shapes
        Outputs:
          boolean: True if it passes, False if it does not pass.
        """
        # Entering the checker workspace
        old_ws_name = workspace.CurrentWorkspace()
        if self._workspace_name != old_ws_name:
            workspace.SwitchWorkspace(self._workspace_name, True)

        op.device_option.CopyFrom(self._device_option)
        if grad_ops is None:
            # TODO(jiayq): use the gradient registration instead of the old
            # hack.
            grad_ops, g_input = getGradientForOp(op)


        _input_device_options = input_device_options or \
            core.InferOpBlobDevicesAsDict(op)[0]
        # First, feed in the input.
        for i, arr in enumerate(inputs):
            workspace.FeedBlob(
                op.input[i], arr,
                _input_device_options.get(
                    op.input[i], self._device_option))

        # Get the loss and gradient for the original.
        grad_name = g_input[input_to_check]
        loss, grad = self.GetLossAndGrad(
            op, grad_ops, inputs, op.input, input_to_check, grad_name,
            outputs_with_grads,
        )
        grad_estimate = np.zeros_like(inputs[input_to_check])
        if grad_estimate.shape != grad.shape:
            raise Exception(
                "Mismatched gradient shapes: estimated ({}), grad ({})".format(
                    grad_estimate.shape, grad.shape))

        if ensure_outputs_are_inferred:
            self._assertInferTensorChecks(op, grad_ops)

        full_grad_check = os.getenv('CAFFE2_FULL_GRAD_CHECK') == '1'

        dims_to_check = inputs[input_to_check].size
        for current_dim in range(dims_to_check):
            # Grad check is very expensive (as it involves running the op from
            # scratch for each of the input tensor elements). Thus, let's
            # run it by default only on a small subset of dimensions. Here we
            # apply very scientific approach: the first and the last 3 elements
            # of each tensor. Pass CAFFE2_FULL_GRAD_CHECK=1 env var to enable
            # the full check
            if not full_grad_check and current_dim >= 3 and \
                    current_dim + 3 < dims_to_check:
                grad_estimate.flat[current_dim] = grad.flat[current_dim]
                continue
            # Positive gradient
            inputs[input_to_check].flat[current_dim] += self._stepsize
            pos_loss, _ = self.GetLossAndGrad(
                op, grad_ops, inputs, op.input, input_to_check, grad_name,
                outputs_with_grads
            )
            # Negative gradient
            inputs[input_to_check].flat[current_dim] -= self._stepsize * 2
            neg_loss, _ = self.GetLossAndGrad(
                op, grad_ops, inputs, op.input, input_to_check, grad_name,
                outputs_with_grads
            )
            # Recover the value
            inputs[input_to_check].flat[current_dim] += self._stepsize
            grad_estimate.flat[current_dim] = (
                pos_loss - neg_loss) / self._stepsize / 2
        # Now, check correctness
        fail_mat = ~np.isclose(
            grad, grad_estimate, atol=self._threshold, rtol=self._threshold)
        if np.any(fail_mat):
            idx = np.flatnonzero(fail_mat)
            print('Failed. [idx, grad, grad_estimate] are:')
            print(np.vstack([idx, grad.flat[idx], grad_estimate.flat[idx]]).T)
            ret = False
        else:
            ret = True
        # After finishing, cleaning up things.
        if self._workspace_name != old_ws_name:
            # We reset the workspace to make sure everything intermediate is
            # cleaned up. Note that there is no need to delete a workspace -
            # when empty it takes a very limited amount of memory.
            workspace.ResetWorkspace()
            workspace.SwitchWorkspace(old_ws_name)
        return ret, grad, grad_estimate

    def _assertInferTensorChecks(self, op, grad_ops):
        tmp_net = caffe2_pb2.NetDef()
        tmp_net.op.extend([op])
        tmp_net.op.extend(grad_ops)
        inferred_shapes, inferred_types = workspace.InferShapesAndTypes(
            [tmp_net],
            nets_proto=True,
        )

        outputs = set()
        for grad_op in grad_ops:
            outputs.update(grad_op.output)

        for output in outputs:
            if output not in inferred_shapes:
                raise Exception(
                    "expected output {} to be inferred".format(output))
            blob = workspace.FetchBlob(output)
            correct_shape = list(blob.shape)
            inferred_shape = list(inferred_shapes[output])
            if correct_shape != inferred_shape:
                raise Exception(
                    "Mismatched inferred shape: want({}), got({})".format(
                        correct_shape, inferred_shape))

            if type(blob) is np.ndarray:
                if blob.dtype == np.dtype('float64'):
                    correct_type = caffe2_pb2.TensorProto.DOUBLE
                elif blob.dtype == np.dtype('float32'):
                    correct_type = caffe2_pb2.TensorProto.FLOAT
                elif blob.dtype == np.dtype('int32'):
                    correct_type = caffe2_pb2.TensorProto.INT32
                elif blob.dtype == np.dtype('int64'):
                    correct_type = caffe2_pb2.TensorProto.INT64
                else:
                    correct_type = "unknown {}".format(np.dtype)
            else:
                correct_type = str(type(blob))
            inferred_type = inferred_types[output]
            if correct_type != inferred_type:
                raise Exception(
                    "Mismatched inferred type: want({}), got({})".format(
                        correct_type, inferred_type))