add LambdaRank DCG Loss Option (#23679)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/23679 Full Canary: https://fburl.com/fblearner/sa1pkpya Add LambdaRank DCG Loss Option * when use_idcg_normalization == true, regular LambdaRank with NDCG loss * when use_idcg_normalization == false, gradient and loss functions are not normalized by idcg. Differential Revision: D16605459 fbshipit-source-id: a16f071e69516974e48d27bef4ca179019ca4ae7
2025-10-20 21:14:14 +08:00 · 2019-08-02 11:38:10 -07:00
parent fc6aec9491
commit 302adf1d20
3 changed files with 109 additions and 18 deletions
--- a/caffe2/operators/listwise_l2r_op.cc
+++ b/caffe2/operators/listwise_l2r_op.cc
@ -131,9 +131,6 @@ float LambdaRankNdcgOp<float, CPUContext>::LambdaRankNdcgSession(
      ideal_discount_data, discount_.numel());
  // ideal dcg = \sum gain_i * ideal_discount_i
  double idcg = (gain_vec * ideal_discount_vec).sum();
-  if (idcg < 1e-5) {
-    idcg = 1e-5;
-  }

  ComputeDiscounts(rank_idx_data, N);
  auto* discount_data = discount_.template mutable_data<float>();
@ -156,17 +153,25 @@ float LambdaRankNdcgOp<float, CPUContext>::LambdaRankNdcgSession(
        CWISE_SIGM(
            -CWISE_SIGN(PAIRWISE_DIFF(r_vec, N)) * PAIRWISE_DIFF(y_vec, N)))
           .rowwise()
-           .sum() /
-      idcg;
+           .sum();
  if (use_ndcg_as_loss_) {
-    loss = 1 - dcg / idcg;
+    // DCG loss function
+    loss = (idcg - dcg);
  } else {
    loss = -(lambda_mat *
             CWISE_LOG_SIGM(
                 CWISE_SIGN(PAIRWISE_DIFF(r_vec, N)) * PAIRWISE_DIFF(y_vec, N),
                 100))
-                .sum() /
-        idcg;
+                .sum();
+  }
+
+  // if use_idcg_normalization_ is true, the loss function is normalized by idcg
+  // (e.g. NDCG), else un-normalized loss function (e.g. DCG)
+  // Note that normalization is mathematically correct if idcg is guaranteed to
+  // be positive!
+  if (use_idcg_normalization_) {
+    dy_vec /= std::max(idcg, 1e-5);
+    loss /= std::max(idcg, 1e-5);
  }
  return loss;
 }
--- a/caffe2/operators/listwise_l2r_op.h
+++ b/caffe2/operators/listwise_l2r_op.h
@ -17,6 +17,9 @@ class LambdaRankNdcgOp final : public Operator<Context> {
      : Operator<Context>(std::forward<Args>(args)...),
        use_ndcg_as_loss_(
            this->template GetSingleArgument<bool>("use_ndcg_as_loss", false)),
+        use_idcg_normalization_(this->template GetSingleArgument<bool>(
+            "use_idcg_normalization",
+            true)),
        use_exp_gain_(
            this->template GetSingleArgument<bool>("use_exp_gain", true)) {}
  USE_OPERATOR_CONTEXT_FUNCTIONS;
@ -35,6 +38,7 @@ class LambdaRankNdcgOp final : public Operator<Context> {
      const Tensor& r,
      Tensor** dy);
  bool use_ndcg_as_loss_;
+  bool use_idcg_normalization_;
  bool use_exp_gain_;
  Tensor gain_;
  Tensor discount_;
--- a/caffe2/python/operator_test/listwise_l2r_operator_test.py
+++ b/caffe2/python/operator_test/listwise_l2r_operator_test.py
@ -8,7 +8,9 @@ from hypothesis import given


 class TestListwiseL2rOps(hu.HypothesisTestCase):
-    def ref_lambda_rank_loss(self, y, r, use_ndcg_as_loss, use_exp_gain):
+    def ref_lambda_rank_loss(
+        self, y, r, use_ndcg_as_loss, use_idcg_normalization, use_exp_gain
+    ):
        n = len(y)

        def get_discounts(v):
@ -35,14 +37,16 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
        d = get_discounts(r)
        idcg = sum([g[i] * d[i] for i in range(n)])

-        if idcg < 1e-5:
-            idcg = 1e-5
+        if use_idcg_normalization:
+            session_weight = max(idcg, 1e-5)
+        else:
+            session_weight = 1

        d = get_discounts(y)

        if use_ndcg_as_loss:
            dcg = sum(g[i] * d[i] for i in range(n))
-            loss = 1.0 - dcg / idcg
+            loss = (idcg - dcg) / session_weight
        for i in range(n):
            for j in range(n):
                if i == j:
@ -51,8 +55,9 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
                rank_loss = -log_sigm(y[i] - y[j] if r[i] > r[j] else y[j] - y[i])
                rank_dy = (0.0 if r[i] > r[j] else 1.0) - sigm(-y[i] + y[j])
                if not use_ndcg_as_loss:
-                    loss += lambda_weight * rank_loss / idcg
-                dy[i] += lambda_weight * rank_dy / idcg
+                    loss += lambda_weight * rank_loss / session_weight
+                dy[i] += lambda_weight * rank_dy / session_weight
+
        return loss, dy

    @given(n=st.integers(1, 20), k=st.integers(2, 5), m=st.integers(3, 5))
@ -64,24 +69,41 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
        ref_loss = np.empty(0)
        ref_ndcg_loss = np.empty(0)
        ref_ndcg_loss_no_exp = np.empty(0)
+        ref_dcg_loss = np.empty(0)
+        ref_dcg_loss_no_exp = np.empty(0)
        ref_dy = np.empty(0)
        ref_dy_no_exp = np.empty(0)
+        ref_dcg_dy = np.empty(0)
+        ref_dcg_dy_no_exp = np.empty(0)
        for i in range(m):
            r_loss, r_dy = self.ref_lambda_rank_loss(
-                y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], False, False
+                y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], False, True, False
            )
            r_ndcg_loss, _ = self.ref_lambda_rank_loss(
-                y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, True
+                y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, True, True
            )
            r_ndcg_loss_no_exp, r_dy_no_exp = self.ref_lambda_rank_loss(
-                y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, False
+                y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, True, False
+            )
+            r_dcg_loss, r_dcg_dy = self.ref_lambda_rank_loss(
+                y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, False, True
+            )
+            r_dcg_loss_no_exp, r_dcg_dy_no_exp = self.ref_lambda_rank_loss(
+                y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, False, False
            )
            ref_loss = np.append(ref_loss, r_loss)
            ref_dy = np.append(ref_dy, r_dy)
            ref_ndcg_loss = np.append(ref_ndcg_loss, r_ndcg_loss)
+
            ref_ndcg_loss_no_exp = np.append(ref_ndcg_loss_no_exp, r_ndcg_loss_no_exp)
            ref_dy_no_exp = np.append(ref_dy_no_exp, r_dy_no_exp)

+            ref_dcg_loss = np.append(ref_dcg_loss, r_dcg_loss)
+            ref_dcg_dy = np.append(ref_dcg_dy, r_dcg_dy)
+
+            ref_dcg_loss_no_exp = np.append(ref_dcg_loss_no_exp, r_dcg_loss_no_exp)
+            ref_dcg_dy_no_exp = np.append(ref_dcg_dy_no_exp, r_dcg_dy_no_exp)
+
        dloss = np.random.random(m).astype(np.float32)

        workspace.blobs["y"] = y
@ -94,6 +116,7 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
            ["y", "r", "session_lengths"],
            ["loss", "dy"],
            use_ndcg_as_loss=False,
+            use_idcg_normalization=True,
            use_exp_gain=False,
        )
        workspace.RunOperatorOnce(op)
@ -107,6 +130,7 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
            ["y", "r", "session_lengths"],
            ["loss", "dy"],
            use_ndcg_as_loss=True,
+            use_idcg_normalization=True,
            use_exp_gain=True,
        )
        workspace.RunOperatorOnce(op)
@ -135,6 +159,7 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
            ["y", "r", "session_lengths"],
            ["loss", "dy"],
            use_ndcg_as_loss=True,
+            use_idcg_normalization=True,
            use_exp_gain=False,
        )
        workspace.RunOperatorOnce(op)
@ -148,7 +173,6 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
            ["y", "session_lengths", "dy", "dloss"],
            ["dy_back"],
        )
-
        workspace.RunOperatorOnce(op)
        dy_back = workspace.blobs["dy_back"]
        for i in range(m):
@ -158,3 +182,61 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
                rtol=1e-5,
                atol=1e-6,
            )
+
+        op = core.CreateOperator(
+            "LambdaRankNdcg",
+            ["y", "r", "session_lengths"],
+            ["loss", "dy"],
+            use_ndcg_as_loss=True,
+            use_idcg_normalization=False,
+            use_exp_gain=True,
+        )
+        workspace.RunOperatorOnce(op)
+        loss = workspace.blobs["loss"]
+        dy = workspace.blobs["dy"]
+        np.testing.assert_allclose(loss, ref_dcg_loss, rtol=1e-5, atol=1e-6)
+        np.testing.assert_allclose(dy, ref_dcg_dy, rtol=1e-5, atol=1e-6)
+
+        op = core.CreateOperator(
+            "LambdaRankNdcgGradient",
+            ["y", "session_lengths", "dy", "dloss"],
+            ["dy_back"],
+        )
+        workspace.RunOperatorOnce(op)
+        dy_back = workspace.blobs["dy_back"]
+        for i in range(m):
+            np.testing.assert_allclose(
+                dy_back[i * n : (i + 1) * n],
+                dloss[i] * ref_dcg_dy[i * n : (i + 1) * n],
+                rtol=1e-5,
+                atol=1e-6,
+            )
+
+        op = core.CreateOperator(
+            "LambdaRankNdcg",
+            ["y", "r", "session_lengths"],
+            ["loss", "dy"],
+            use_ndcg_as_loss=True,
+            use_idcg_normalization=False,
+            use_exp_gain=False,
+        )
+        workspace.RunOperatorOnce(op)
+        loss = workspace.blobs["loss"]
+        dy = workspace.blobs["dy"]
+        np.testing.assert_allclose(loss, ref_dcg_loss_no_exp, rtol=1e-5, atol=1e-6)
+        np.testing.assert_allclose(dy, ref_dcg_dy_no_exp, rtol=1e-5, atol=1e-6)
+
+        op = core.CreateOperator(
+            "LambdaRankNdcgGradient",
+            ["y", "session_lengths", "dy", "dloss"],
+            ["dy_back"],
+        )
+        workspace.RunOperatorOnce(op)
+        dy_back = workspace.blobs["dy_back"]
+        for i in range(m):
+            np.testing.assert_allclose(
+                dy_back[i * n : (i + 1) * n],
+                dloss[i] * ref_dcg_dy_no_exp[i * n : (i + 1) * n],
+                rtol=1e-5,
+                atol=1e-6,
+            )