mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
add LambdaRank DCG Loss Option (#23679)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/23679 Full Canary: https://fburl.com/fblearner/sa1pkpya Add LambdaRank DCG Loss Option * when use_idcg_normalization == true, regular LambdaRank with NDCG loss * when use_idcg_normalization == false, gradient and loss functions are not normalized by idcg. Differential Revision: D16605459 fbshipit-source-id: a16f071e69516974e48d27bef4ca179019ca4ae7
This commit is contained in:
committed by
Facebook Github Bot
parent
fc6aec9491
commit
302adf1d20
@ -131,9 +131,6 @@ float LambdaRankNdcgOp<float, CPUContext>::LambdaRankNdcgSession(
|
||||
ideal_discount_data, discount_.numel());
|
||||
// ideal dcg = \sum gain_i * ideal_discount_i
|
||||
double idcg = (gain_vec * ideal_discount_vec).sum();
|
||||
if (idcg < 1e-5) {
|
||||
idcg = 1e-5;
|
||||
}
|
||||
|
||||
ComputeDiscounts(rank_idx_data, N);
|
||||
auto* discount_data = discount_.template mutable_data<float>();
|
||||
@ -156,17 +153,25 @@ float LambdaRankNdcgOp<float, CPUContext>::LambdaRankNdcgSession(
|
||||
CWISE_SIGM(
|
||||
-CWISE_SIGN(PAIRWISE_DIFF(r_vec, N)) * PAIRWISE_DIFF(y_vec, N)))
|
||||
.rowwise()
|
||||
.sum() /
|
||||
idcg;
|
||||
.sum();
|
||||
if (use_ndcg_as_loss_) {
|
||||
loss = 1 - dcg / idcg;
|
||||
// DCG loss function
|
||||
loss = (idcg - dcg);
|
||||
} else {
|
||||
loss = -(lambda_mat *
|
||||
CWISE_LOG_SIGM(
|
||||
CWISE_SIGN(PAIRWISE_DIFF(r_vec, N)) * PAIRWISE_DIFF(y_vec, N),
|
||||
100))
|
||||
.sum() /
|
||||
idcg;
|
||||
.sum();
|
||||
}
|
||||
|
||||
// if use_idcg_normalization_ is true, the loss function is normalized by idcg
|
||||
// (e.g. NDCG), else un-normalized loss function (e.g. DCG)
|
||||
// Note that normalization is mathematically correct if idcg is guaranteed to
|
||||
// be positive!
|
||||
if (use_idcg_normalization_) {
|
||||
dy_vec /= std::max(idcg, 1e-5);
|
||||
loss /= std::max(idcg, 1e-5);
|
||||
}
|
||||
return loss;
|
||||
}
|
||||
|
@ -17,6 +17,9 @@ class LambdaRankNdcgOp final : public Operator<Context> {
|
||||
: Operator<Context>(std::forward<Args>(args)...),
|
||||
use_ndcg_as_loss_(
|
||||
this->template GetSingleArgument<bool>("use_ndcg_as_loss", false)),
|
||||
use_idcg_normalization_(this->template GetSingleArgument<bool>(
|
||||
"use_idcg_normalization",
|
||||
true)),
|
||||
use_exp_gain_(
|
||||
this->template GetSingleArgument<bool>("use_exp_gain", true)) {}
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
@ -35,6 +38,7 @@ class LambdaRankNdcgOp final : public Operator<Context> {
|
||||
const Tensor& r,
|
||||
Tensor** dy);
|
||||
bool use_ndcg_as_loss_;
|
||||
bool use_idcg_normalization_;
|
||||
bool use_exp_gain_;
|
||||
Tensor gain_;
|
||||
Tensor discount_;
|
||||
|
@ -8,7 +8,9 @@ from hypothesis import given
|
||||
|
||||
|
||||
class TestListwiseL2rOps(hu.HypothesisTestCase):
|
||||
def ref_lambda_rank_loss(self, y, r, use_ndcg_as_loss, use_exp_gain):
|
||||
def ref_lambda_rank_loss(
|
||||
self, y, r, use_ndcg_as_loss, use_idcg_normalization, use_exp_gain
|
||||
):
|
||||
n = len(y)
|
||||
|
||||
def get_discounts(v):
|
||||
@ -35,14 +37,16 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
|
||||
d = get_discounts(r)
|
||||
idcg = sum([g[i] * d[i] for i in range(n)])
|
||||
|
||||
if idcg < 1e-5:
|
||||
idcg = 1e-5
|
||||
if use_idcg_normalization:
|
||||
session_weight = max(idcg, 1e-5)
|
||||
else:
|
||||
session_weight = 1
|
||||
|
||||
d = get_discounts(y)
|
||||
|
||||
if use_ndcg_as_loss:
|
||||
dcg = sum(g[i] * d[i] for i in range(n))
|
||||
loss = 1.0 - dcg / idcg
|
||||
loss = (idcg - dcg) / session_weight
|
||||
for i in range(n):
|
||||
for j in range(n):
|
||||
if i == j:
|
||||
@ -51,8 +55,9 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
|
||||
rank_loss = -log_sigm(y[i] - y[j] if r[i] > r[j] else y[j] - y[i])
|
||||
rank_dy = (0.0 if r[i] > r[j] else 1.0) - sigm(-y[i] + y[j])
|
||||
if not use_ndcg_as_loss:
|
||||
loss += lambda_weight * rank_loss / idcg
|
||||
dy[i] += lambda_weight * rank_dy / idcg
|
||||
loss += lambda_weight * rank_loss / session_weight
|
||||
dy[i] += lambda_weight * rank_dy / session_weight
|
||||
|
||||
return loss, dy
|
||||
|
||||
@given(n=st.integers(1, 20), k=st.integers(2, 5), m=st.integers(3, 5))
|
||||
@ -64,24 +69,41 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
|
||||
ref_loss = np.empty(0)
|
||||
ref_ndcg_loss = np.empty(0)
|
||||
ref_ndcg_loss_no_exp = np.empty(0)
|
||||
ref_dcg_loss = np.empty(0)
|
||||
ref_dcg_loss_no_exp = np.empty(0)
|
||||
ref_dy = np.empty(0)
|
||||
ref_dy_no_exp = np.empty(0)
|
||||
ref_dcg_dy = np.empty(0)
|
||||
ref_dcg_dy_no_exp = np.empty(0)
|
||||
for i in range(m):
|
||||
r_loss, r_dy = self.ref_lambda_rank_loss(
|
||||
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], False, False
|
||||
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], False, True, False
|
||||
)
|
||||
r_ndcg_loss, _ = self.ref_lambda_rank_loss(
|
||||
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, True
|
||||
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, True, True
|
||||
)
|
||||
r_ndcg_loss_no_exp, r_dy_no_exp = self.ref_lambda_rank_loss(
|
||||
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, False
|
||||
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, True, False
|
||||
)
|
||||
r_dcg_loss, r_dcg_dy = self.ref_lambda_rank_loss(
|
||||
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, False, True
|
||||
)
|
||||
r_dcg_loss_no_exp, r_dcg_dy_no_exp = self.ref_lambda_rank_loss(
|
||||
y[(i) * n : (i + 1) * n], r[(i) * n : (i + 1) * n], True, False, False
|
||||
)
|
||||
ref_loss = np.append(ref_loss, r_loss)
|
||||
ref_dy = np.append(ref_dy, r_dy)
|
||||
ref_ndcg_loss = np.append(ref_ndcg_loss, r_ndcg_loss)
|
||||
|
||||
ref_ndcg_loss_no_exp = np.append(ref_ndcg_loss_no_exp, r_ndcg_loss_no_exp)
|
||||
ref_dy_no_exp = np.append(ref_dy_no_exp, r_dy_no_exp)
|
||||
|
||||
ref_dcg_loss = np.append(ref_dcg_loss, r_dcg_loss)
|
||||
ref_dcg_dy = np.append(ref_dcg_dy, r_dcg_dy)
|
||||
|
||||
ref_dcg_loss_no_exp = np.append(ref_dcg_loss_no_exp, r_dcg_loss_no_exp)
|
||||
ref_dcg_dy_no_exp = np.append(ref_dcg_dy_no_exp, r_dcg_dy_no_exp)
|
||||
|
||||
dloss = np.random.random(m).astype(np.float32)
|
||||
|
||||
workspace.blobs["y"] = y
|
||||
@ -94,6 +116,7 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
|
||||
["y", "r", "session_lengths"],
|
||||
["loss", "dy"],
|
||||
use_ndcg_as_loss=False,
|
||||
use_idcg_normalization=True,
|
||||
use_exp_gain=False,
|
||||
)
|
||||
workspace.RunOperatorOnce(op)
|
||||
@ -107,6 +130,7 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
|
||||
["y", "r", "session_lengths"],
|
||||
["loss", "dy"],
|
||||
use_ndcg_as_loss=True,
|
||||
use_idcg_normalization=True,
|
||||
use_exp_gain=True,
|
||||
)
|
||||
workspace.RunOperatorOnce(op)
|
||||
@ -135,6 +159,7 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
|
||||
["y", "r", "session_lengths"],
|
||||
["loss", "dy"],
|
||||
use_ndcg_as_loss=True,
|
||||
use_idcg_normalization=True,
|
||||
use_exp_gain=False,
|
||||
)
|
||||
workspace.RunOperatorOnce(op)
|
||||
@ -148,7 +173,6 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
|
||||
["y", "session_lengths", "dy", "dloss"],
|
||||
["dy_back"],
|
||||
)
|
||||
|
||||
workspace.RunOperatorOnce(op)
|
||||
dy_back = workspace.blobs["dy_back"]
|
||||
for i in range(m):
|
||||
@ -158,3 +182,61 @@ class TestListwiseL2rOps(hu.HypothesisTestCase):
|
||||
rtol=1e-5,
|
||||
atol=1e-6,
|
||||
)
|
||||
|
||||
op = core.CreateOperator(
|
||||
"LambdaRankNdcg",
|
||||
["y", "r", "session_lengths"],
|
||||
["loss", "dy"],
|
||||
use_ndcg_as_loss=True,
|
||||
use_idcg_normalization=False,
|
||||
use_exp_gain=True,
|
||||
)
|
||||
workspace.RunOperatorOnce(op)
|
||||
loss = workspace.blobs["loss"]
|
||||
dy = workspace.blobs["dy"]
|
||||
np.testing.assert_allclose(loss, ref_dcg_loss, rtol=1e-5, atol=1e-6)
|
||||
np.testing.assert_allclose(dy, ref_dcg_dy, rtol=1e-5, atol=1e-6)
|
||||
|
||||
op = core.CreateOperator(
|
||||
"LambdaRankNdcgGradient",
|
||||
["y", "session_lengths", "dy", "dloss"],
|
||||
["dy_back"],
|
||||
)
|
||||
workspace.RunOperatorOnce(op)
|
||||
dy_back = workspace.blobs["dy_back"]
|
||||
for i in range(m):
|
||||
np.testing.assert_allclose(
|
||||
dy_back[i * n : (i + 1) * n],
|
||||
dloss[i] * ref_dcg_dy[i * n : (i + 1) * n],
|
||||
rtol=1e-5,
|
||||
atol=1e-6,
|
||||
)
|
||||
|
||||
op = core.CreateOperator(
|
||||
"LambdaRankNdcg",
|
||||
["y", "r", "session_lengths"],
|
||||
["loss", "dy"],
|
||||
use_ndcg_as_loss=True,
|
||||
use_idcg_normalization=False,
|
||||
use_exp_gain=False,
|
||||
)
|
||||
workspace.RunOperatorOnce(op)
|
||||
loss = workspace.blobs["loss"]
|
||||
dy = workspace.blobs["dy"]
|
||||
np.testing.assert_allclose(loss, ref_dcg_loss_no_exp, rtol=1e-5, atol=1e-6)
|
||||
np.testing.assert_allclose(dy, ref_dcg_dy_no_exp, rtol=1e-5, atol=1e-6)
|
||||
|
||||
op = core.CreateOperator(
|
||||
"LambdaRankNdcgGradient",
|
||||
["y", "session_lengths", "dy", "dloss"],
|
||||
["dy_back"],
|
||||
)
|
||||
workspace.RunOperatorOnce(op)
|
||||
dy_back = workspace.blobs["dy_back"]
|
||||
for i in range(m):
|
||||
np.testing.assert_allclose(
|
||||
dy_back[i * n : (i + 1) * n],
|
||||
dloss[i] * ref_dcg_dy_no_exp[i * n : (i + 1) * n],
|
||||
rtol=1e-5,
|
||||
atol=1e-6,
|
||||
)
|
||||
|
Reference in New Issue
Block a user