add LambdaRank DCG Loss Option (#23679)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/23679
Full Canary: https://fburl.com/fblearner/sa1pkpya
Add LambdaRank DCG Loss Option
* when use_idcg_normalization == true, regular LambdaRank with NDCG loss
* when use_idcg_normalization == false, gradient and loss functions are not normalized by idcg.

Differential Revision: D16605459

fbshipit-source-id: a16f071e69516974e48d27bef4ca179019ca4ae7
This commit is contained in:
Jiexian Li
2019-08-02 11:38:10 -07:00
committed by Facebook Github Bot
parent fc6aec9491
commit 302adf1d20
3 changed files with 109 additions and 18 deletions

View File

@ -131,9 +131,6 @@ float LambdaRankNdcgOp<float, CPUContext>::LambdaRankNdcgSession(
ideal_discount_data, discount_.numel());
// ideal dcg = \sum gain_i * ideal_discount_i
double idcg = (gain_vec * ideal_discount_vec).sum();
if (idcg < 1e-5) {
idcg = 1e-5;
}
ComputeDiscounts(rank_idx_data, N);
auto* discount_data = discount_.template mutable_data<float>();
@ -156,17 +153,25 @@ float LambdaRankNdcgOp<float, CPUContext>::LambdaRankNdcgSession(
CWISE_SIGM(
-CWISE_SIGN(PAIRWISE_DIFF(r_vec, N)) * PAIRWISE_DIFF(y_vec, N)))
.rowwise()
.sum() /
idcg;
.sum();
if (use_ndcg_as_loss_) {
loss = 1 - dcg / idcg;
// DCG loss function
loss = (idcg - dcg);
} else {
loss = -(lambda_mat *
CWISE_LOG_SIGM(
CWISE_SIGN(PAIRWISE_DIFF(r_vec, N)) * PAIRWISE_DIFF(y_vec, N),
100))
.sum() /
idcg;
.sum();
}
// if use_idcg_normalization_ is true, the loss function is normalized by idcg
// (e.g. NDCG), else un-normalized loss function (e.g. DCG)
// Note that normalization is mathematically correct if idcg is guaranteed to
// be positive!
if (use_idcg_normalization_) {
dy_vec /= std::max(idcg, 1e-5);
loss /= std::max(idcg, 1e-5);
}
return loss;
}

View File

@ -17,6 +17,9 @@ class LambdaRankNdcgOp final : public Operator<Context> {
: Operator<Context>(std::forward<Args>(args)...),
use_ndcg_as_loss_(
this->template GetSingleArgument<bool>("use_ndcg_as_loss", false)),
use_idcg_normalization_(this->template GetSingleArgument<bool>(
"use_idcg_normalization",
true)),
use_exp_gain_(
this->template GetSingleArgument<bool>("use_exp_gain", true)) {}
USE_OPERATOR_CONTEXT_FUNCTIONS;
@ -35,6 +38,7 @@ class LambdaRankNdcgOp final : public Operator<Context> {
const Tensor& r,
Tensor** dy);
bool use_ndcg_as_loss_;
bool use_idcg_normalization_;
bool use_exp_gain_;
Tensor gain_;
Tensor discount_;

View File

@ -8,7 +8,9 @@ from hypothesis import given
class TestListwiseL2rOps(hu.HypothesisTestCase):
def ref_lambda_rank_loss(self, y, r, use_ndcg_as_loss, use_exp_gain):
def ref_lambda_rank_loss(
self, y, r, use_ndcg_as_loss, use_idcg_normalization, use_exp_gain
):
n = len(y)
def get_discounts(v):
@ -35,14 +37,16 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
d = get_discounts(r)
idcg = sum([g[i] * d[i] for i in range(n)])
if idcg < 1e-5:
idcg = 1e-5
if use_idcg_normalization:
session_weight = max(idcg, 1e-5)
else:
session_weight = 1
d = get_discounts(y)
if use_ndcg_as_loss:
dcg = sum(g[i] * d[i] for i in range(n))
loss = 1.0 - dcg / idcg
loss = (idcg - dcg) / session_weight
for i in range(n):
for j in range(n):
if i == j:
@ -51,8 +55,9 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
rank_loss = -log_sigm(y[i] - y[j] if r[i] > r[j] else y[j] - y[i])
rank_dy = (0.0 if r[i] > r[j] else 1.0) - sigm(-y[i] + y[j])
if not use_ndcg_as_loss:
loss += lambda_weight * rank_loss / idcg
dy[i] += lambda_weight * rank_dy / idcg
loss += lambda_weight * rank_loss / session_weight
dy[i] += lambda_weight * rank_dy / session_weight
return loss, dy
@given(n=st.integers(1, 20), k=st.integers(2, 5), m=st.integers(3, 5))
@ -64,24 +69,41 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
ref_loss = np.empty(0)
ref_ndcg_loss = np.empty(0)
ref_ndcg_loss_no_exp = np.empty(0)
ref_dcg_loss = np.empty(0)
ref_dcg_loss_no_exp = np.empty(0)
ref_dy = np.empty(0)
ref_dy_no_exp = np.empty(0)
ref_dcg_dy = np.empty(0)
ref_dcg_dy_no_exp = np.empty(0)
for i in range(m):
r_loss, r_dy = self.ref_lambda_rank_loss(
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], False, False
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], False, True, False
)
r_ndcg_loss, _ = self.ref_lambda_rank_loss(
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, True
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, True, True
)
r_ndcg_loss_no_exp, r_dy_no_exp = self.ref_lambda_rank_loss(
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, False
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, True, False
)
r_dcg_loss, r_dcg_dy = self.ref_lambda_rank_loss(
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, False, True
)
r_dcg_loss_no_exp, r_dcg_dy_no_exp = self.ref_lambda_rank_loss(
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, False, False
)
ref_loss = np.append(ref_loss, r_loss)
ref_dy = np.append(ref_dy, r_dy)
ref_ndcg_loss = np.append(ref_ndcg_loss, r_ndcg_loss)
ref_ndcg_loss_no_exp = np.append(ref_ndcg_loss_no_exp, r_ndcg_loss_no_exp)
ref_dy_no_exp = np.append(ref_dy_no_exp, r_dy_no_exp)
ref_dcg_loss = np.append(ref_dcg_loss, r_dcg_loss)
ref_dcg_dy = np.append(ref_dcg_dy, r_dcg_dy)
ref_dcg_loss_no_exp = np.append(ref_dcg_loss_no_exp, r_dcg_loss_no_exp)
ref_dcg_dy_no_exp = np.append(ref_dcg_dy_no_exp, r_dcg_dy_no_exp)
dloss = np.random.random(m).astype(np.float32)
workspace.blobs["y"] = y
@ -94,6 +116,7 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
["y", "r", "session_lengths"],
["loss", "dy"],
use_ndcg_as_loss=False,
use_idcg_normalization=True,
use_exp_gain=False,
)
workspace.RunOperatorOnce(op)
@ -107,6 +130,7 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
["y", "r", "session_lengths"],
["loss", "dy"],
use_ndcg_as_loss=True,
use_idcg_normalization=True,
use_exp_gain=True,
)
workspace.RunOperatorOnce(op)
@ -135,6 +159,7 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
["y", "r", "session_lengths"],
["loss", "dy"],
use_ndcg_as_loss=True,
use_idcg_normalization=True,
use_exp_gain=False,
)
workspace.RunOperatorOnce(op)
@ -148,7 +173,6 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
["y", "session_lengths", "dy", "dloss"],
["dy_back"],
)
workspace.RunOperatorOnce(op)
dy_back = workspace.blobs["dy_back"]
for i in range(m):
@ -158,3 +182,61 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
rtol=1e-5,
atol=1e-6,
)
op = core.CreateOperator(
"LambdaRankNdcg",
["y", "r", "session_lengths"],
["loss", "dy"],
use_ndcg_as_loss=True,
use_idcg_normalization=False,
use_exp_gain=True,
)
workspace.RunOperatorOnce(op)
loss = workspace.blobs["loss"]
dy = workspace.blobs["dy"]
np.testing.assert_allclose(loss, ref_dcg_loss, rtol=1e-5, atol=1e-6)
np.testing.assert_allclose(dy, ref_dcg_dy, rtol=1e-5, atol=1e-6)
op = core.CreateOperator(
"LambdaRankNdcgGradient",
["y", "session_lengths", "dy", "dloss"],
["dy_back"],
)
workspace.RunOperatorOnce(op)
dy_back = workspace.blobs["dy_back"]
for i in range(m):
np.testing.assert_allclose(
dy_back[i * n : (i + 1) * n],
dloss[i] * ref_dcg_dy[i * n : (i + 1) * n],
rtol=1e-5,
atol=1e-6,
)
op = core.CreateOperator(
"LambdaRankNdcg",
["y", "r", "session_lengths"],
["loss", "dy"],
use_ndcg_as_loss=True,
use_idcg_normalization=False,
use_exp_gain=False,
)
workspace.RunOperatorOnce(op)
loss = workspace.blobs["loss"]
dy = workspace.blobs["dy"]
np.testing.assert_allclose(loss, ref_dcg_loss_no_exp, rtol=1e-5, atol=1e-6)
np.testing.assert_allclose(dy, ref_dcg_dy_no_exp, rtol=1e-5, atol=1e-6)
op = core.CreateOperator(
"LambdaRankNdcgGradient",
["y", "session_lengths", "dy", "dloss"],
["dy_back"],
)
workspace.RunOperatorOnce(op)
dy_back = workspace.blobs["dy_back"]
for i in range(m):
np.testing.assert_allclose(
dy_back[i * n : (i + 1) * n],
dloss[i] * ref_dcg_dy_no_exp[i * n : (i + 1) * n],
rtol=1e-5,
atol=1e-6,
)