Compare commits

...

3 Commits

3 changed files with 3 additions and 4 deletions

View File

@ -1,6 +1,6 @@
[metadata]
name = trl
version = 0.18.0
version = 0.18.1
description = Train transformer language models with reinforcement learning.
long_description = file: README.md
long_description_content_type = text/markdown
@ -89,7 +89,6 @@ dev =
%(quantization)s
%(scikit)s
%(test)s
%(vllm)s
%(vlm)s
[options.entry_points]

View File

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
__version__ = "0.18.0"
__version__ = "0.18.1"
from typing import TYPE_CHECKING

View File

@ -1229,7 +1229,7 @@ class GRPOTrainer(Trainer):
# Identify sequences that terminated with EOS and log their lengths
agg_terminated_with_eos = self.accelerator.gather(is_eos.any(dim=1))
term_completion_lengths = agg_completion_lengths[agg_terminated_with_eos]
clipped_completions_ratio = 1 - len(term_completion_lengths) / len(completion_lengths)
clipped_completions_ratio = 1 - len(term_completion_lengths) / len(agg_completion_lengths)
self._metrics[mode]["completions/clipped_ratio"].append(clipped_completions_ratio)
if len(term_completion_lengths) == 0: # edge case where no terminated sequences are found
term_completion_lengths = torch.zeros(1, device=device)